diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index d9c4ff1dd7..a51989e119 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:10b49b1f38b63921f54a2b6fb1871b44b7b02817a8644fa3ba8f289c30d628b9 +oid sha256:7e0e2b774a6c3cabb0e060ed69e2c8e2c6bbe1764bbbaab38d36e347e3bb57ce size 609323 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index b527967785..6fe9a47b24 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5f13e52e591136fd6b02ae0f84f941a6e6ac0cbcebd1119184e74cee0f507061 +oid sha256:83194aa60840a949f84496bb9f27659d4694d1e635935330d755523c36b735a8 size 545131 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index c9375e399e..bd66e5fc49 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:311255de87c190cffd29c018a2cf033adec4cc4e9c2e380db31880bc8ec303c4 +oid sha256:458b8def069e59c69b6b9fdbc906eb3a245ea139229d5b2385c27a5ec5ae18c9 size 598271 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 8e5aca6e81..97930eeb5c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3599b28315eb62a0e3975db765d1c2e2b3745f39b77e5705191ae1d66fe2ab1f +oid sha256:97589b590015dc0349dc53d2349e76e5f747267310fab94600d0c4b6a22486cd size 534081 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7d73cdeba0..ed734986c3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4465aafa05f0bf9201d0c774b660c688698bcc34de36ac219f7b24f940e8f3f5 +oid sha256:62fadf416568ed8c5be310a53ac251a3488c00a2308b3cb0f9048d42e95e719b size 460315 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index cddc9310bb..a2cefe89e4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba95767123f4f48aaa72f746f37d9e47493ffb242dfde6f3f53006c05b8a1a87 +oid sha256:1efe0fce91ba26a5cad02bee190c24be2d6751b17d144c3f9a4af12931a3a39a size 429011 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 10a34ed5f5..ecfe7498e4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:38d9f342489c9fd28f1e7f3b0561ad4339dd33b113d401032c74d77ce7ab02ed +oid sha256:1424bc3198804ac37471502af729d5ed30b4088c168bd45316342c3e2ae2d7ff size 449579 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index a080400ea7..3348a43876 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:669c83197784222284480e5205da341fa0991c7be1a0c789554e397abea57fa2 +oid sha256:1cc5097176a862183eb1fa18d773bdfc245d9376744d28deebb7450ac7340e55 size 425403 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index ee37bf5098..cc7edd1964 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:63e1b58009fd22bf14f1bfca117293f2adc2b806d2f283368b6e5c6a5f22e754 +oid sha256:a4b4d0561696d5728a80cd24af358c63ab78dc5b2feac10d43767236ae331547 size 605373 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index eac4a35358..96e8db6c12 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:636a36a55be3020aaa50fb85df56365b5e5881e711bf5395e731c58aa015fcfb +oid sha256:801e9f9aaa027e98e7466db1e5a71ceea509e7faffdc5aeb9debb91b6db8f89a size 543625 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index e68e61e3d2..1139ad1441 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:83d02465efa09be36e50a8e427160b34d714ec3d25548aeca568cbac66c7c17c +oid sha256:f5993b141d7cde0f588f496e8bb89df22d2ab6c359f3598077b9241747156ad4 size 428597 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 66d15eb750..0d7aaba7a0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:966b306fb7b339d5aa3b3cd376d5b09c6986a0820b5be5cf7f98ea4de34e6810 -size 369833 +oid sha256:bc6584aed823757456b163a4140ea1660815760ff4f966fd0c36e32e06bff9a0 +size 370623 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index cfa8f57fce..07e917327d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0ba84096554f56c034c631042bda0d5e43881e23307e03522d052731b5ab2b80 +oid sha256:39d3d3da78f96164b8245e2ded87d2d5dc831ef5d1ece26cecb3c3aad178a79b size 412315 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4f1035bf3d..560784beb2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1d853f77d981e547e7cbc0577475a9161437d403ee337d906ff072e33a316cf -size 353551 +oid sha256:c70b84adee9466978f712c974654b53ca4d21eca3ba4ee7f3763a945679f4208 +size 354341 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 226370c4c2..9d2f9abc61 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:29affef27b333f6049b66cbeb0f1a56b302001f29a91818608a7b34492a16e1a +oid sha256:056470956e537a019ab0019fc5aeb0b3e45814e8ac1c4f2545d79c2bacd5752f size 449263 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c96ed1fd06..f4a32cd6c1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd5f19344299f9b1a168c24585c23b7a07e72a581118366deef274ddf7a353cf +oid sha256:6058dc20904d61781a8fdeb49a96cbaae4d4b0bdbc03a3b263886d19fa7d81c1 size 417959 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 99b9d8ec11..291f3b0c7f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e79ccc4e7e8099a9fee12fe73e1215f9ad7afe4a4b6af0a47a348dbf16b0528f +oid sha256:25ee9eed542e3b6c6d09544b0a7fe77a4f3585ef9958b2c97dfd5e7b0ad5a6dc size 438527 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index be455371d0..b455cae81f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1778c374f583c9951c6a60ff46b3b98cb91a2d5a5d9bb7628df756e9972a0e90 +oid sha256:d35c5308dbd338f71e815558e58438b59e6bacd1f9953e80eb82602b0b1c4830 size 414353 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 07415ba30e..9ea150edc0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd94b7ea49a01b8027e1318b2cc8e272af9dfdab7c6031e05a4c303a4c31caad +oid sha256:d8abd45bd9ad702dbdaee33b3997f31616dd8af1a5bb847a7aeda34ef9d1a906 size 594323 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 95edb5b138..083acfb626 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a53ad56dfd8c7fa31dca0b3bda740a668e2f6540985c406a61d6b455a9c2520c +oid sha256:134105ca624ddc23f161faec9c4b5401ef2f8246ed8f0816204cb8b5f97cd3b4 size 532575 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index d5ec879bd8..ea72f8f268 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8f8536975d97db6f074af85f17079158ba38bcf60ef7fa8518338b7160bfde0c +oid sha256:da228e2856d9a716df7170b35702965f07bc9dff19196df782f399dc615991e8 size 418337 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index bdc2b7895a..5400674676 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c556109f0c7c560daf60dea04b344c07a90ae088fc459e46089dc4846aeec9f8 +oid sha256:4fc99a15e9a7b7db772277ae26457e391a3d5ff452eaca28f32ec21a61d1709c size 359573 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index a60fa125fc..7e2c1c9589 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9aa72dbb4470391e97e5ae8dc7af24765dfb12421f7310b7051dd8131c63b91d +oid sha256:582516974c5b71a9794888940bbbe10f170c44e0f65a3f7d4bf1f7b048d12d59 size 401265 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 51d9cdae89..0d670fe4a2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:143ef6490354b58562f9ad5ed660f0f8494bd9f4fcdeead8f1e39dfc3759b3f5 +oid sha256:70d9d8daedd35ff863ce288284c0ebfd722665569f0b7f788d516e386720b169 size 343291 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8c9127f001..47f6bfa46e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:80a712d8e44185f9bb2ac294e7713296730122ec47140a4b4d546449f0bfa04c +oid sha256:9708057344f2813105b824dcdc494f5e05c9563d2a9545cb51a9aa0c5ab47761 size 479293 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ede5abef57..06e63be87c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba7060aceef705af18bec47f4f1e1dc1edfd1a8b60f0f49e67953832271c4f26 +oid sha256:a268b9b5c2319938c604fabd9ec695905bae7432d9e00ed5af08c3d8fec4da31 size 448777 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 72009bfdbf..00de9b2fe3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:84d8810252dd4cc3386831cfabf66d0079f858d3e8402eaea6048d791b7d7d82 +oid sha256:a008063d504f7beb4a6e275f40689da982deee6608778addab4ecd958903fa2f size 469345 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 91e223332f..d9997f0373 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4a92b4ea3f9f7b73a368747ec40452f156de680b022abe0ecdd15f9c5d1aad8b +oid sha256:8acbba887bfb703aa796c0984143a281026db3720bbf6166cdd871f4fe9929c1 size 443591 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index a22b304bd4..c9ba357089 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:66d13ef9dc43e61c2f63505877bac29185a753f1990dd3a59e78f6387e1d6407 +oid sha256:c7f2258a99b512e91b896a897478e8fd9af91d09b26b335b1d515b75a8656ce3 size 647342 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 70382913c5..c6a6fbc23d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:44ead407bf0c1171177e55f1ed837280da83f98a3c3626af99e8eafadc77940b +oid sha256:116e1a3c6610378ac7981f139e9bcdcbb5025d4df8c2b11a90949577aae2c08f size 583595 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 1e1a3c6b60..721bf21927 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:476ccc9850d01af0f5bcaa7e4a32fd90ddf17beb6ea9498f50631ebf31bd0ebf +oid sha256:771eb850d08c939ce268c35a8992c40b8c233b66235533a19b22ca66c1c83762 size 452213 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 34e57383fc..97e1491d5c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a15226b483660131606f0c713cfaa85179fa684666f9d4854b53aa9b86254527 +oid sha256:77982ca37706064117eb0bdc13d3deb99fa8ade3ca93f8778209633ed1dba118 size 383163 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 84364c9645..140b52b53c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9617dd28511bc1dc9de87f2132a3b4a6e204837789eabe54746d14b945889f91 +oid sha256:ab7b342275848b478e51eb310d670eb2ead7f0b6408a64fb7675736ed42ec997 size 432773 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 95d65c65c1..858afd72f2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1baf383ddbc72e2ca8c228c8dacc003e7ac74809cd004331802cb92a258171fd -size 365303 +oid sha256:42f8da62dd959b5973cf4aea5da0d962381a023b80c3269e1a6981c7ef1deecb +size 366091 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 0d142252eb..d79acd10fd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b80c5d6b20ec9e745e898c90fc1dbb8712c0c86997dc0774f1735fdbbb983d9c -size 468241 +oid sha256:9720d9564adc6ff1d74d45573264964ec2bf8b2ecade6895c7ceb3c3749ab2ee +size 469031 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 69a30c1fe7..74a6c0c149 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b3213608963d86e8d50e24759ac7c26b4e8f0f86355bfd7bdc28bfbbfd694a0 +oid sha256:0b0a71145651cb709f558b450bcb2af2f845433bdbb6cd9c36e094744c8ae0dc size 437727 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 80105478e2..06e12a799f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e0ef3c0557a7c6972d2a3f1b68fd1b171c6a8a116db9e99173c81625fcaeeb4d +oid sha256:0c50e8ca669694c4adf0a2ef442e5ccf67be475ea65fec5942cc5adc102b1ac7 size 458295 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6e82ecee54..e48d98d17e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:46586f6383132589cc29e7c914c5ee05f223e3c35044438b709a34768b6059ec +oid sha256:e0af13adfa5314867bed6f014dc96082f98563ca464ac8fe9dde77aecabe2e77 size 432541 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 97f94a8e4d..5dee21c26f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:478f58a590969970f47d468d8fb83c95c802ac21162d4e42c590d931f22687b3 +oid sha256:fb125cf9a68c4843e35eec39071d589d86978b1fda5b54daaf757cb80fa93f85 size 636292 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index e54edc2633..2ac9911238 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:34c0f6f9ca7969eed52434233c45d62210b0bdc2f575c8332d838c17f06977a9 +oid sha256:ce48d46a620fb43941968c1b662f7b4a3a2e708d6ac8007bde57b4edf65220e3 size 572543 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 5777315ce0..03b28cf45d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1ad0d19350d092d9132d9d5f4c05a6ea6b9b4355003de2756d6c38ed7c83b295 +oid sha256:bacd646e2029c28a362aab6fae049fabdc04f6cf460bc914f54f1e29cef4d139 size 439583 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a196a8e283..4d446ef6e3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:58fdae3070d60a170b6148b0b68f8693aaaef2db9635dd109df6b87c5c08bab1 +oid sha256:63668716c020f026b8812907395cc82a3ff05ea89f258ba25cde1b136b30ae12 size 372113 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 5f6bc82f36..38d2be5405 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4c21dc83cacab299237ab49c21b495f4460b3e100cd3caf1ed3e8c527ab96ad2 -size 419355 +oid sha256:a20a7e2e15e62a8400d9f40d5fb0946cbbbf96906e5e42a7bc5b6b2d4c84b4ef +size 420145 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b7dd76dbaa..3f982b0044 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1c15fdb2d2deebb49f7ad1a6c275e25213ec056936e3862d73668528c3488748 -size 354251 +oid sha256:b1deae00bfd30a89b6df43a665d3f34934f221cc98e189e51d60033a4c9f9fe3 +size 355041 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index a28d8d9eb6..ed9ea35ee0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bcc58f79599f5c2663f730c8ddb4f2f23c2d69fb3affcfaaaaf09d620fc85da5 +oid sha256:9c75d601fa7804e62bafb1dbb06c377a3d194a07d8e9eccd0276224023dd5c38 size 638998 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 814ae9f0ce..722a9e2cf3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b0ddc9f0fd7a8337f9c78fdb1c7cad753a5c9bb6ab0145cf053f999812482fa7 +oid sha256:a1eba3a874451f32716f7f283e7849e2b45fd89c049589453533515fc806ba1f size 553617 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 299f1966b7..6b2b20de19 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1797bda7b642dcdf3d967337b906b66830e4418778fd50566f89a7237446404 +oid sha256:419cf33a68bcaf4bc1c6cad48d810be7ffd816ad0fd1b3eef94bcec0344bb5c9 size 617685 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 3ffaaa607a..aa44be7c86 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:71fbe7be6af98fa27bb7cbef72a47714380e7d14a45db1f3817673f0087de6b8 +oid sha256:047335b382bd67c2262e5f8c058af1f7b3a7fc6a5e58bb72b8b962add25790e6 size 532305 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8fa51e579c..f4d8b8ed77 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ff0465ee3309afcb1fd2ccf96ac3c08a733f8fbb9e4892abf80064ec8fe23393 +oid sha256:5e7b2dee7e6748c12b158c6223cd1b3e731d34b235456713b64e2e7defa68ffb size 570821 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 641203f3aa..1dcd40c9de 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e94e120c7820a9f8ef8ce32cb33c89ad08495a15e8960fe49b4522a1bd65c6d2 +oid sha256:0f9b3a116151ea6aba238627d6760c5d306fea245ca77823935f2ac15b0f745f size 545831 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index fced0e5aba..3c417dbeb3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1b918f7a56919526284a594caf91da80466cebef83281f6a4b03fdbe4956bb38 +oid sha256:400db48260e6ff2bb4b6574195c63735c10a60e1e28861dc802caeb8bf24489b size 557717 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ee869336d7..7e3df4c669 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:303d055ba263763dc7490cc1f068db0ad6be9a93863084cac65f05e1cee751e7 +oid sha256:07bcaeae935460978795014e17b9b697dc6e2d1f653b866f2a53764bc51f570f size 537489 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 9b13489bd4..c31f9f62ac 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e7d844a48f9683486678226517a7e26955c3a8a41fc0bfd766aaf9cb550197d7 +oid sha256:a5065c4697a089402900408bdc0e044fd615fe6bd0ac9cd4bd1677259f57bdb3 size 639588 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index e3be320e4b..55ff4abd32 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5bcd6707c6c4aa098f4d11747b707faf7106372c1a6c46c7f8135c90e29d2b52 +oid sha256:e3ec5e0a5ba0f4f53e0cfb8aba7d33887655ba825d2df72598c8114f67e6baff size 587679 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 049ca4e8e7..66fe8c6b58 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:33faa0e4e137535c3ff65df0b5fe4e1648cbbb71fd9b83a70f6ea31f72364c29 +oid sha256:483979d9e9a46fe5c62e062aefe4e04f8820ebf3cb7b986f3249a94b4d0c08c6 size 534369 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b23c218b49..b3102c83b7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c09967cabc9c02635715ede8079d7de03116d231739c18d961d97b33a84a9518 +oid sha256:9a5716424ec1be6109841eaba73ea0e6a3bb1cf2882d78789426901157cd3a7b size 477183 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 5b24d95839..3d5000e2b7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4a593de0fb293c29dfda8fe39bf3ad455784cf4f6b7f5dfcb25ead4ba921f8ef +oid sha256:0084d18498fd64071bbde52dbd829c3735e3dd23ab73d11a69bd0793c2cf2065 size 515719 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5ee3a8916a..ee4292eab7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3362cae8b93e31023e6717ec6fa996da8d4103a001badb0ad6a3ca832d073912 +oid sha256:920dd8f1288e1005131021afab361245e7ff6bc5ebfc441ae0cfe92ed1df9b88 size 459323 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index dd0b0e4e3c..0a73a3dfa1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c23fef2bb64e9d768f508bcb575efe9d30ecd2954735ea70a39f1d3142f692da -size 549509 +oid sha256:ceff7493ab713c4e0a0ead3a6d1dd4c894ed9add166885c96950e855899045e4 +size 550299 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6f45c51f7c..21d1c753d0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3350a445b124179d3e90604964a0435ec0be3281cf205d28e0697e1a9165e9ca +oid sha256:7485cf9c19ab7de43f1af75924674cdf8c6af27ece1ca6e12c1e703dc77772d6 size 524519 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index bc2b02728b..962f09117a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cad0d85b3d50f3024b2ecba171e1e562dea75545a49569f8a46245033f82d5cb +oid sha256:73eee78418a2c922210e9063ca10bc3b51107fc5aacdd273320de6e301d970ba size 536405 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 3339632c07..32df009fc5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8c4922d86c7cb272d74b5e66698d6f2025f8a5f2fa9db016fb204ef068540909 +oid sha256:499b741bab50a4cae71db27a6a275c3ffe8708716d8ad3cceb811d67c69b0973 size 516177 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 38b047c728..db00fa64be 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:347a9d3b24fdc8e56d8dddde47512c346e5d00cee0ff436ef38da78750da556b +oid sha256:c3d5cc8682a111e81fa688af081d70f268d9798cc5d7cd2036098af8c402a0f7 size 618276 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 2f25ac14ad..7637258d5c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d4812d1add32918edf19bf1875774be021b71789e2939e7fea96f03faa218f86 +oid sha256:f8bae3b4682e75004584192ed991be710abc5fcac2f0eb7166ce3283ec430e59 size 567157 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 16fadca07d..e660699f2c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6ee77bb8856d5310a8f7d93c6ce7945c18a9af45148956834ea8e7f9221b3c54 +oid sha256:2c8c3fb8697b5f82be3d3617f2b281b76db12209c0a19719a7d0ddc619c58b34 size 513057 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a8d98f8093..6f270b5dad 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ae3e7b0e891747937df6b5a262345620b39a31f0c110f6e6d3c5b8a02f8f820e +oid sha256:e2af58295e357cd025c2d41d916f94d78ff4b9c2241f80b1d5c6e6dd9f427764 size 455871 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index d89276a098..3229aac671 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f2a75869b9c2ebce30062af1d4314b6edbffc208451aab2947c1cf1e6ac3849 +oid sha256:0c3c0492a243211464328106ff758dcd8f49b70b1788175b83ba2553fa32040f size 494407 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index aeaa55f12e..246b00beb8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7ff2f09510faa790b143c549217aae47aaa975022424b6f699338d510862bd42 +oid sha256:c7be04907db7df6890b8ff47477d92537e97a060b2a15e716f74b961a50ca293 size 438799 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8ebecd5cbc..dac133cd41 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:013972d9826a497a9bf35807008f5b9b92a782f82df73f4b14625ebc1106d05e +oid sha256:207ce5a7b77fc1369a8065e32726a5907f831ef716f3a07c1a57f7efa1e74f69 size 590589 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c6f6f66c6e..2f10c0c5e5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:076423c4d0c4d7d2330fa1567269e15802127d3b7b02cc39d2a945682e1f5116 +oid sha256:0c95ea63c1fd0cf9be1f2d4a0bd00cbcaef293947e8bb2e649c8831730fbfe6c size 564809 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b6de225eff..4c1ce3ba2f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:431ecbe0ec09ef369d99b01b2864d39b9675e54a256108a1622c03ae419524fe -size 577485 +oid sha256:5335c20af9df1e135acadda4e8500fc93a9a6fd57f242ba786eaec29a9362ed1 +size 578273 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 10e6c7e01b..d540807011 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5fb405a4d5a695a626d04121888af294f3399e8f3502e99239e2da11427f918e +oid sha256:61482aa0efa3b1e2bc5de30371004e8e3dce895b79e64d031773d8d9c3f331c4 size 555677 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index e1b988b321..02e08bde1d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9d3eee73a4a8d14e2ec167c92fb87ef3348baadfea6e76466d2d12da32b41c38 +oid sha256:89a7be0324a89c1c012aa5bde5fc27b09102440f13625e1c8cdf4c8de2b8e28c size 677016 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 3e68eb6004..aab1ec140d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f11b5d32a132af7aa8007978f399cd23c97fe56344774a99afa75444bfdb96cd +oid sha256:36ed9309120c2bb74d26fc261da0647f85cd7c578aa9e2f909ecac16e31d5cd2 size 592425 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index de5da7bac3..9d0654729a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5e8681eb66029710c963432cd570707d1b2205ac2ee456a44143ceefb126abc0 +oid sha256:4215710fc336a28d5e4a8c862f4a8c65219704faad0e45fcd52496a1f931aaf4 size 557195 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 00b09deac7..c326fd14a0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:67a8899801e6e8a8df979a8393f1abcada0f9e09cae4dd6c485fbb4bca3979f2 +oid sha256:97b6587efd2ca2da444fc664f8b588146b8683b86d41f59ff836592a5fe9aea9 size 484987 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 5f664e5dfe..c3cb8948cb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f5fb830dc6a22fd1a0782d47819c35cb2e64cf9fc5d61cbd91e2322ac03e3faa +oid sha256:06ebf1bdaf037345c1ea881c9708f1caa2e9f9122cf277b173e6f45019a21a7d size 536177 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index de2fb9b108..295e7066fd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8f3cada0a9cf7128f657b7d6dc2380ff99834dc2bf10c34393b9dc0ab76b4ac1 +oid sha256:d0cbd364d7223f22f63e8b14d059018af229dd00f646edaf4a83ce753622c24d size 466337 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c8baf1f5f8..e890da2bac 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:39c4a093fddc60d113ce23e4691155c9bc1fa025fa133550d613bc19264164a0 -size 569277 +oid sha256:78a48e288b5ebf69af44e0d198d712515162a775a7bfa0bde9778ee4984c6003 +size 570065 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 644262ee13..22facf389b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2d12e8f2d01e0d6daff133425e0d99c5aa2a09c184859b6cb668c7ba2c9ad7c5 -size 543497 +oid sha256:455753d48f62a8c3f09ec80f12d9d6e31c6b51f5a33a249ff285a68148f7f9a0 +size 544287 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 36f5b40128..34a2126cf3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:db09dffdc86e866de326256f1132de0ee240f17ae8fa1dc6a4be7296ffd286cd +oid sha256:55cc933af0d609b5f444b63bd260177165e37b424a4ed693be443e0ccca78666 size 556961 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 16aaa40cbd..4fea069d5a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8661d9e5a77151f0a94933551fcb10c8fed00471dbd28ec68420a34f5d7949b5 -size 534365 +oid sha256:958fc38e924aa8ea21a1df82e28ad2af31b258a34105771736e61fed00e72ba9 +size 535155 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index b2a11ba7ba..d09e37f767 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bbde0f9361e99a2dac19e2cf1bf6faafe9231bafb9c48ee192499e21a6b1e070 +oid sha256:f4570ab05595afe4f4a5024f4c9d1ae2b0a89a17612a7c76189607625915fa3f size 655704 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index afe9e961a4..6b8dac1cb4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aa042f3cbd590c88f42a8ea5cbb5a4556d4c2a7463f1ac07946bc1a2de8c3752 +oid sha256:599119a25d21588f2efeccfedc79938505e399d6d05af78c5da0437e5d27b675 size 571903 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index cd629f2d99..36887b412e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:24855cadb9b6636a1929841426460074569832c2123d1fdce5d1aaec68777cb4 +oid sha256:2f3d00ac5deb0c52d6485c2743956c2c4d32c625da4920f74ad12c2a25955dd2 size 534303 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index e47cb7eca6..ec1a183d21 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:132e678bc017b3c8d477bc168e2293dd7eee2b3684b070559fa738eb4703810a +oid sha256:a19cf3c9047cfcbaa0272699859342c4d4cb2999245858d79154fa103a30f711 size 463675 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index a1b5da7325..afe7f85748 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:341f5f57c1daeffa7a34bcd03176216d570c37c7a29e71f0c0458a21031f91e9 +oid sha256:3c87395de8198a6c252acdba4e8feb285a467c4a721f983ea1cd0018112644c6 size 513285 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index af79e6dfe6..e944521600 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8ee2bc6c1b1e3be9200f20b19e779644137b857d6ec5ebf644de7e61f348c4f6 -size 445025 +oid sha256:cfa1a5b253a8ce83cbaf5f1d4a89c7c82029dd99e7b43f85a37bef9ec34fb06a +size 445815 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 06bf8d0cf3..f70f585f22 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:27a5254f6d27784dee48e6f78bc29673de0b313740f279de278f976f6b9d6468 +oid sha256:850d27cac2525410314f6e9095dff09d88c80e0e5658adb4ccb1f04a45fcf4b6 size 540673 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index abada1f8bc..6903a6fcd2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eb568567481b85b0d258934766887058334480149ddfd6dc3793eed05f404e35 +oid sha256:c57c7e6c626460af41f0ee2e20ae5e7f79e3e8b4309751fb0c7b557fe3d77bf6 size 482131 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 135ffa2744..18babfe371 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f664a79773fdba5c7e6a220261762188ca283b3ba0b84cdf260e47c853dbe5ff +oid sha256:2af9cd8eeda9e5f233d5927e2693d62dbec67b0127db77934816f0b7646759ff size 529623 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 8020eee2c8..134f3ef6fa 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba79876073219451a7098ba544b612d22a9b36808985653c0b77ee69407a6f1e +oid sha256:3a21ced2993682127666a369714bfc6d07663d093fadd682522bbafb0d23143f size 471081 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 05279e5a44..9c3a48eed5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:be2b9b6eee71fa235dd5da58341d92f7bec851761b3740a34066316eee386c9a +oid sha256:3c75ef38ea0423414ff5739740c42694aa2f36c82eb59b7d1f89b64f0a0b8ddb size 433697 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ec02ed682c..94585672d8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fba9855af0b75e1f5bd01a3ca3494eb2a41ddb514fe8c1e2a08c3f9e57b8e1af +oid sha256:5bb999a86972033e51335c25bc5e6dda1e4512d21446645bff8552644386f3bd size 418747 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index be981c1431..375f76ab1b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0b390459703e4dcdd960473e83b75d2be6160e07ff28a349dfd31aaed60e69f7 +oid sha256:dfbe2d3f7dbd2e18f0ec5df77582edcf585a6c3844821276b203bfd5a4a58cfa size 428461 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5fdea99139..46cfaa881c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:92b9cdc07b52b8ecd7c6881efec519a1a5f212a4f5a282d57b93b305ab927f40 +oid sha256:1732ea77c95feef233dba9189c850db55059bcc446031449a082098c78d1cb59 size 413561 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 79c1643753..921766146f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:986b0a17891582ae0c3ce62b113ac1b31d6f68555c7f59ee4dec014bd9cabc75 +oid sha256:7a7943be47f715e598f5ddb45e6dd4fe2f2391475d949297d7734877205eee6d size 535145 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index bb4977acbc..3dad8155a1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bf8fd7acf52005e959be90ac47128af535c42a38cf84bd15f04ca2531cb15ec7 +oid sha256:08c46f3f1873b47bf8ed31ced47ed044d0293acaaed88dc5619ad3f92ccb9967 size 480625 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 7cd11a3682..06a063f19e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f42794f17d8ab58646d0f8a21d115f54d61a00926e9a83746965725c72d1f6ee +oid sha256:172d6b82663cfa1f836cce05be75e09469610522d6ed1f6e96c06d605d325ff8 size 417743 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6a36570399..f17d0a9d65 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:89001f7e1089cb1f30354535f8877477e8486975690fe74bc2833ee8d55a054e +oid sha256:ca8a1b08f9f579e32ce3af942cde2e015afa825bc3b0f1258bab53026463f904 size 360557 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 7eac9eee82..93c22552e6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dfcbcd7b3ee6c9da4f633675a2994c7334f9c0c11374cb01da00af31209e407a +oid sha256:a82ae823f653ef618d9a2de16b80ddbbb69f9f042ad0c95ac5d83f596610457a size 400473 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 101820c5f5..2d57398890 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f8802bc54bcc17133d71bbfde044063a7887592dc3a08998364a0f474a92a931 +oid sha256:2023713a5658429743c9e94f8cb6c924d0dc4fa215275fa2fdf030885041883f size 344077 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 961b8b8880..582d8743ac 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9454732bdac6c1fab715a5f34b65ebc472464b6e0d2b6425d4b366ec7dbfde68 +oid sha256:5a49a4ea4b9abd739712d16a6945ba4c68443a08e56d0e07360e8ec48d0ddd60 size 422647 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 42179c7918..8ad1271c3f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:13a57f1a0271b80a48f3b603ec0e92836aa695639aafbaed67e5925f89e617b1 -size 407697 +oid sha256:8af9a2bd1b16e13a9a7bbab695c4f619be8098145f64053a2e3ed0e224890683 +size 408485 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index eeace8eefb..826049c498 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b01542f2e3d44fa6f8ea7b87711a1ea2fa3087c07d470c134f97889141e7cfb6 +oid sha256:70b3c91acbec06a5e5cdeece9ae2ce927314ae68c9c8a91806f6b877b6bf34d9 size 417411 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b5d83dde0d..2cc7ee39ee 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:01fb55425be015e7e01fb03e3d29b42dde997e054a1f7d3eb006f824be2b23ec +oid sha256:0815440cce7a056131435a5472d7c2659e1b50011b268c4fe45fbe3e4a058b0d size 402511 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 65e68f89c2..4954afb0b2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c3d2d252dda9f89684e7a40804420b1cf10219ff8b73fb3f9f8b59b8249035b0 +oid sha256:0cd8e25949ba28b9c69e33d4a887cb974e9d7417577aade1de1e7262b4b66f00 size 524095 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 5ec173ac28..7a27a5bbce 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:24465fa4ad8275d07eaea3a7712f930c0c7e3c1898b63ab63c1ce600e303cdc1 +oid sha256:83ba531934e943d116bc710302f3050787e6349df2470b10d684c2565c37b542 size 469573 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index b54139dbd4..ac578ba834 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3fd75162b236e7d9f9d2259680e355a41c149c3c9d94ffb47b7f3e8c0c713a26 +oid sha256:8a9bf77e3ef6479f843a5724c8c7425ebec4bfd8df4282e5dc5b4e93e73f6af9 size 405903 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 01bf2b92ef..c9988c9869 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:516b30e08a636f4c588bb44aaff4404e8935eb79db6e470d4f09297390834cb2 +oid sha256:497c5d6093ffbe9771c729e3d39dea42552146f7dc2be2dc4a538b33a2c2b9f9 size 349507 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index e2d6037bd5..c6e279c2be 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fec2b2b8a8cbceadfcfacf62a045f72cf34ff54ace3ccfe9622617d14e29dc27 +oid sha256:606128cc259ed45c23c0b33fc634c535f36eb73ebf7f6055aeb067e62a680b94 size 389423 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 12631a5ef0..84834e055f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2cb8045b8f6bc5aab5e808d1d92fc37f46e5628c6cd08dc5ed707c9ed4f17d99 +oid sha256:fb0dffbc1d00c988b592db2b1f04647d11f595fb1d204f4f38dc41c58ee8c44e size 333027 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8da5945560..836e2aa8a2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:84a9413256d073e7e3a7d928bbe6bc8a44ff63a9a3e69a27705c458fffed00c9 +oid sha256:50a392cf95ae585ad75a37ad6c041f7a9377c242ef5e1e83911a555aff36c94b size 454253 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2b3d40b6b8..ef481e2416 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:23cdc0cbfa7e6ac7716e65b11e3e7644dd85ffdd9c8fa9a964373fafce5e9329 +oid sha256:7c47f5413bab14a5b216fadf47cd3b34c670a0f7ae1be140c9f902778f46362a size 438515 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ff04949c53..3a9f9ca3df 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b6e6133da34c815a2e9679c4cb0b61907e3ce02d371c65ab64b1c5ee74ae65fd +oid sha256:e9c8d7aa07e731d92aecefd0a543d5146e58100f09327e44b45eb123556489c1 size 447439 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2d40a2e50f..18a5175603 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:21cf6eeed45d80bda8c473eb42e1d22f8f14bee2ccfab44a33b1f57b38daa11d +oid sha256:ecb989175f2c19bd73e3d2d0035f164bb21d5dd4e3091dbb2bf5a4b26e6e56ee size 432539 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 9561a70095..81437e5411 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0034c606bd415e6dd7edce4d8936542bf22e3aaddd14e3d25f434a74ad628507 +oid sha256:0a737db3a97915fa54d1450e29525b27c4c190da6772ae197283cda43347f165 size 578691 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 340c6fae4d..ee93951870 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5e3d089c0377427e03d5e50b8ab22a0ec5a884c965698ad07f8f722609edec3a +oid sha256:9d6a5065347b68c81caa7993aaadadef16105af62cdd2ff566dc809fb7b36003 size 520841 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 3e973936e8..915a54bbe2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9881ce457ada58fb2882c892ed713a637339568bbcaefe9a3c8d547f9a00a2e2 +oid sha256:6c8ef5d56f86901a2aab3359c51267ed53bbd48391ce234c7c03d707f1873c4f size 442147 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6b7441f05b..9d6b115e6b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8eaf50394a50373635e9db42ba23c1242539c3a0979c22ae08330bd3a13781ad +oid sha256:fe9e9abedcac6a7b4e05166e3cc2901b1815d9bf5994818eafc2f692c075fd5e size 371519 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 733318a210..8300a7a237 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1c2b9d3d66468506b8c659e210bb19bb3700404457eb9f227479933965636a22 +oid sha256:626fd4db5f7ef3dff25746ae059c0b67064e0cc558e305adfd088be3fd74735f size 421721 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index df7a3909ac..fb34533eeb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1c1abe7d1579c6a08a36f10ce0dae46b92e0e94f5a12d0c0fe08f1a3158abe3e +oid sha256:568c888ebd5b8517aea650aca8c9e75fe5000003f0bbc39686b3a30e636e97d5 size 354249 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 05a33bce27..234ca84f01 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5869c0207f52f52ea451a5dfde236eefe9e08a11b6bdb8860e6a913382dec6e3 +oid sha256:048e46a2cf0e9b26c80c38640303249dfa111319e638f99ba3874358c6f31fde size 443203 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 33193c028b..e8d3836f1f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fbed28d3ae4a96cacefd8863821e0a52d323a428b29401f71c91a70d2a33b931 +oid sha256:8c816ed700c9d0a9764b6c6f28728ab1c81f1fc498fd391bdfe579d4e1ce32fd size 427463 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d05cd77ac7..f9b1cbd7b6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ce07789fb1ff1e8c4610feb0ae6d6b72ca6c590138b76b687b6cefdbdc746178 +oid sha256:fca1e745309592101905931e299af598778b1a3c73d96fd77e213ae5c1a7acf3 size 436389 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 58387f1791..681cb3ec39 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eb1083f7d88ab9359fa6b90f8a4ee43f2b3519e6c2e741a7e9a9547ef1d888e3 -size 421489 +oid sha256:acad5704ec6478c3dcf4e5db95754d2d5afcf31c99b7e82d523da579139837b0 +size 422277 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 36357058ac..276eda1d1b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2e52f944df1908e72bd55ebee0a459d885accac7f7b633710527440e9cf53cb4 +oid sha256:eb4e568a59e85d25fa0988027b1f7bb34e68a198378e03d91eedaa6b36ea6c4f size 567641 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 7d457e39ab..c782a00f61 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4fa5877ded0cd5d995aa10cb5176b1bf8b0bf7c107a499f99afe0a78cbb4bbc1 +oid sha256:53ccb3a84f7b371cb07d3640f416950c3229fb53954e9b78b80daf9d7b445afb size 509789 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index f8e7adedc5..d78e12cec8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:952ec234fa725016715c76cdb3a83086fb2d9f563d12345f8c25b55af47db304 +oid sha256:cade34a43999694db7da14508edb960ee7dadfab854b83f9238276b904c10e9b size 429517 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index e384a3b55c..57cbc7e715 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:074317a8e7d347c31e48ebe80831a588a4a8b37782abf0a87ee9c2b300bcbaea +oid sha256:f14d20c3b4b9249cc8121d545785b44ee28f59c727fc51ab313b767ee82f0256 size 360467 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index db183893fa..3672afc1eb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c88cf420c3d9d62079265e1245a34f3bf88dfef78f298454df71e607cd31968e +oid sha256:c09f98f2719c64ca751aafc2673005143fc2007aac0cd8f2a7a6b800324d33dc size 408303 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 34b7edeaf0..de30b17a47 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fadeb3bcb3790a17ee59c68e885d74afd65f38d966b0b1973a69c9fdec52fdc9 +oid sha256:5234b8832f5b4d30c6bd6dab14b10d93e6bccc0042fedbb15c18e4c1b016d31c size 343199 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index b3016373dd..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e4ed237d72dce752c284ebb5d403feca8c36f062b3aa8c8c993cea53a9a00c45 -size 698622 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 3a05cfe505..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1c4b24be901c4cd9b6d780389f8b6498ca1114ab5b88eb7c16c8fdccb38f0c14 -size 611195 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 93e6f8859a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c5c187595a006bc1d4f2aceb0d189ea1646f862be1f2b0fe5e8c857a84cd0b38 -size 725336 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 6ab67a4ff3..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:801486dbbce2177a5f3ad4dbdbac03628dc860d29e5564e9598626cbf7934e17 -size 637466 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index d3557cc107..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:898f7ef246b7bca6020b72281c53af8867bb7b3b7d7ab3e958df34decea70092 -size 695956 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 025ca9380d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b1cd4ff0a1fd6a4ae717db2330bfa750ef935cb8374a5e3068e40798436c58a8 -size 612031 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 6cd6e29119..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c6929302de905b9be452370490d3d4e2a8cbbba9b8dd4726c5c11cf29c6062cd -size 720204 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 74b652ceb6..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a6af86db7b99a2192275d2f04490fb7a9e85ad3a5b6640d722c3c036e52f261b -size 637168 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index c4b08c4b2f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6f0abb91997f0a7cbaf1d3bf52f23293c71c1b32db46fae4351c720a0039eb9c -size 764910 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 4f53d1be51..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b9161bb3aa5585a973eef031fa9ece7c47e34f2cf544753fb6bc08f5c3cb401d -size 678766 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 7889ee661a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e52ca39ddb4b680019e69938f838c06f7544fd62bc84a4cfd37569f7d7985b6e -size 791770 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 693315c2e1..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:13ac3948be921c878e9818e3a85f1405b25ff533da2685c04e6a7977c3c58c7d -size 706416 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 3db317bb77..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:714ddf7ff58be1ef80dc3ea5d40b4e0844006bc61fa4038e83010a4e6f7d4d78 -size 799660 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index bd7852fcd5..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6f4a361bd30184b458d7b0a776e284a55b5e6fd88d9e597a5a0e4f31d0125d99 -size 705276 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 2c175f3b2f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9bc3d41c4080d8b3e4509f5fe6c5e2b8660cacb4efc3bc1f16a585107766937a -size 823166 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 9bb4037134..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4a17ff684eb28919c9c24cb9a48da304d9af2f1ae855c6c346a922cec9472d1d -size 727994 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index dffda2ad5d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6617d0c431ef4167aca53cab33a8d0c980efa753e704abc735edc5b0fe6e5adb -size 783280 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index f05f39633d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8a425139b44506a2b6d484b1b2ba0275044fe547ec7653ff1cb40b6d021481e3 -size 688306 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 6cb89df66b..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3ef9b80367cd1e4b358a945dce69b1dfc213b79b4e8e0e6c97f72e7095fa76eb -size 809056 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 8874452ef5..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:36edf98b29b50d7a3605f941f75f30594e27ffb9f6b93a80a56de9e3ebc9a5a7 -size 713688 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 3f1d8c5447..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2cdb8eaa427934f99b1ee0f5c5e64a7b594412817b3684c8e6d9bf46b246575f -size 795366 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 420218f066..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:99e3328549088c4ffadb7d9d88355be36b3356c8f7733feec5440a41931401d1 -size 706754 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 8509617a68..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ae3202148a6b2dbe1ce80cf6ac3610ff5e728ed6e50caac3f337de7be39799db -size 818872 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index d8f87e8266..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:72a4c7be54103bc4f3543d314551c26fd68de9283b7b1ea1ee1204708708039c -size 728486 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index c7eab6aeef..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cf6fa1f657716db2de6ce37c07ef2d60b1c5f1f253161b6bfb7fa6561ce95d0e -size 777902 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 617f304ce6..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ed6e4eeb163d91579fc13b1e81d3990c0cd82c0a0c0275cfd1210d3041b088d1 -size 689982 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index d19ab8efec..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bf456d811ce4a74e29f8f498a311cc2ea367daeb693fe812ee0af33c5520b6be -size 802494 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 51d7bf91b4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:db03db487e367b83c71d17a58ea23e361bb21d3b6b8c1ab555ca197afd9ced49 -size 714228 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index ec991ed9b3..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7d90bda3a58e98bc706d121d8020a73e93cdcabe56774d8a47e3d0002e29440e -size 869004 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index ea592f17bb..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e43f8f66b1980173af7cced90efa5cbd02e5e0fa9a0c24f5eb140d659ec33a96 -size 772748 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 4bffde6cfb..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:14f02e950c4cf021141ed6e3d85655db897ff74dd94e7b9b4cac55d8c6493aac -size 892808 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 2fbfd8325f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dd6bc7a8401ac0f4b5fbc61ef529d79ea2d811b3e8b484283438c48ef08ab1a9 -size 795712 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 2b33171421..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6a521f4e7559a646ccb0abd8d3e440b1c4ad1c94d5faeaea8b456f137013f835 -size 852724 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 5cb2239da4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:010ad3879bc3eb41afd13f732109cce8d297888a72067339e41bc991d2096ed2 -size 755974 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 93004c257b..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:78aa9f27e352989a88898b0f0a7daa9ee13224682c714da6758b45fed3895818 -size 878698 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 5d017600dd..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:af1b4e8c20e7d284113d85fdd201804085185e3a9b7cb36185cdb6b3a9cf9ff7 -size 781404 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 63fb9711c9..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b8d0cb178d59aefe19f6bfbd4f915d932fe3ae1c9ad46ce4a25f1d96a5c9196f -size 635082 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 93751a21ad..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:813a773a79eb67987596e50b575670535f777f73998fb8bf98024cf26e20400b -size 537885 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 7c56484c14..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:94dda201edc05e0a10696856c8b1ac3228f275e8e4100d131de9e5ad9153437e -size 633500 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 27c416841a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a272e9c49146271175ffd70aeca8cedd7a9fdbdebf7410e4fbf629b5bc07f449 -size 553473 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index a3eb356f6c..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f82aa69c17cf9082eaf0cbf075c9837e191c7ba16372aeff9531cc02082a9177 -size 702206 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 8213d04c58..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c409189f8146bf3eb07c409729527c004fb424c2ff60fe294306a764887d115b -size 606195 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 1ea1f484b1..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc35f7e961aedbba898c656de9616e5a2c91a8b89816223f80723a984f10545a -size 752200 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index c74c5283a4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2a347f9f95cd4e61c8e6eeb483d0fe33b4de4a5e8f967aa2e3bbdbefa1dfd5a8 -size 646472 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index a7d6cb9ed7..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:474486e5beb78dc10589e61db1d7031c43367860757a5695eec719f2bd7b8804 -size 724376 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index d337d97be0..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7e0be8b900e4bb1c95b36c26daaa4715cbe597e0f0b7a6f258734bd1b317b0c5 -size 619436 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index b75f3c0257..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:78fd0afbd4434057d027b296ea0293e35a6e2593d6bc4ad1d3a284b5978854d9 -size 746328 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index ff3bca9e3a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b9d691fb279a6a646bddc95d58879fd230593a156340db16afe060bdaa555f4e -size 664328 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 56febc733d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8ce45b13319fd56e7133e6949b2358641bcda5b70d4e320bdcced734c63716a2 -size 718504 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 7952dbffa3..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:13ef3587f5df36a30544ce2920405faff0a13c83477e4841735595245c3b15cb -size 636554 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 4846a663d3..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7c8bb259480eb4cd3c843a77cd1de6317891c54c1cb4f66bcef005b382bd8cc6 -size 817204 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 5b7d21dfd9..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7b9f5b93042d1e788858a1c450e61665b6cb2cd0860f5f3a9c636403e4ae412c -size 714632 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 3eaf6c17ea..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1cc7bc1d72ff72c0ea0fc6e507c7c8cf02e0881042ab077fcf74c20381f4197e -size 789380 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 76154fcbd0..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4ac3bf38bcd0d7807904aba5b0485e3cc5ee3d2e8fd946cce1c75f54ae265a2a -size 687648 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index e892b77aae..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ddee03adeadc633e3832305cf44f8b097282b8e8af31c097721f612a256006c3 -size 633254 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 587890c5c0..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5cdd73b1ad9ff4d6499027429a7cf3735d30013163699be7268c10cff63e7a4b -size 549527 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 46b35158f7..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0d9f990e1a2ac77fbb145708852a950d771aaf040f0f6bad6e0d1e21cd5cb2b1 -size 667712 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 36f68f31d4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9b99487b4c3fd6352b31798f54ee7e4bc3c34f9b23a29b276069444dc1f47577 -size 584083 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 460c4d22e4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e2b09df61c8ba53b1dc3e15f087ad41eca321258677122f157274ff97a6b743 -size 630686 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 176d29159f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1e1f1527857605d1330d89e712d24e11fda199b204a6da791e04e90b6a6088a8 -size 549229 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 9e7f54a596..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d726153bc09c9e1115989ddf19715ea0e90694cd80ded27e39ab55f6c2557463 -size 665836 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 7436819895..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f0a9946a2a4745e4f5e8d06d8d57ad7f6af0554f83518fda8388c92c478e4487 -size 582749 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 788925ddc2..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:04992ce7ced3ecfbfa4c1dc7f36150f11887e694243f6bcd94d105b8cd9d2a7c -size 699540 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index c8333f58ca..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c2db66bd04f4c2e4f0be02ccac72de1a873ebc25489d9beb7bdd4e260c4618df -size 617639 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index c6d918660f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:83bbf86cf6249b719629f173583a8ec22af84cbca88f28b11029eeb70b4477be -size 734936 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 08ff438927..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e9afa9b774bf59566d218bd7875612643b17d77be46b259954922ba06f7cd678 -size 653036 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index b5c8a65bdf..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6eebcf618982d85e868b78f1fdbbb78bb999d45ca8318dc86974187835839fa8 -size 723930 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 341fc391b9..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f0ac2f8db6e60314e16903059a1e060b95c7bebb1be0f5f56265e6ffc5578f21 -size 629548 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 4d499f802d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:17d93b8ded8520562fd6b7a447ed337e1c65225a867bc5d08da45f3c277a1a78 -size 753160 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 50dfc443f9..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5f7f0d1940731fc31891c79322adc01669854932323c85698779db28264d5ffc -size 664500 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index d3c7470596..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4c4669d23bee9400aa487e59c76c4911d805bd113ac7cc0139c5d286168b6f98 -size 716778 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 5fb8edb7ea..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e1abf8680b21a1ecde63f831180be925f8ae7b6d80f526d6dad4b5ba4bbe50e -size 622394 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index aea6ece127..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b003d825ca7e7614e73e55ff12345848bd4e50fdbdb0b1427ce4c3de54aa451c -size 746056 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 52ccff845d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:992e5dc768b1078ffdcde53a1f1e6c5369f2c88b9b34b34fb2224244cd41992b -size 657346 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 40c867f6b2..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:20fb3a989a5b3de5a350f044870cf923e80907759d33354ea959d44e3c5ff205 -size 713372 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index d118c633b4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:914dc729fd570b12c117fe6b6c0793f8baca847a21c50ba59e5d522127325bff -size 630336 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 7e41e094f0..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8c630d84c5596035010267f6c6775a46342beb72e5f674ce9c2e15a72aff90d7 -size 748964 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 36b9a846f6..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:026752928624328f9844dfc1a9f55b174235a0f78b83a7b1c99036e1f0d29d1f -size 662426 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 1ea1d27cb2..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5d046032a45b7249fcc534ed160fd876a1402bfab688659c49fdc344a2b3b5ab -size 706316 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index b6a2d001d9..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:70dde622cb48f4d8a04275b8a3ee982b978a891d0555212e2eb9d5776d273b45 -size 623182 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index d4c7cab9d9..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:50414dcc776e553eb84466feca2d38bef662fed9474111f83a9fc63d2c98d457 -size 741960 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index b72b1ef295..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:477499ec5bf2a67847337ecdd2499c0a27e0084efe11f7ae4de11b5f18a46df0 -size 655322 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index e452c2e7df..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8d8b07799ad73b8cd1d4a67a66bc0000b97153e49fdaa9eb006cb87a37adb381 -size 792882 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index ea7601930d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:68b6c9fce05345a1efa15e16569b5565412ca4dd53b855a71b6e62937a038008 -size 699238 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 8bf6e7e7f0..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f87941ecd42570aae1d828cc427922b2399d0a28909739aed23d6b06c97100cc -size 822258 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 2352592435..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c36fbecace92b815db5fff45c7fbe986be4b2f485ee4a21844ba6fe2bc56eb24 -size 733846 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 7f19c96c8e..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5c6738429ccc5ec716f045f29443f538a15c0c4ead5422df70f7a621303f28d3 -size 785728 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 08ba986de9..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:705c9558326fa83d364879ff8a6ae87acb3f987e80e95d7526843a20803631a7 -size 692134 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index e86fbd1b9c..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7a38ee437ae948d6e97f6443f1b4a4e5803160cbbc228af412476097923c0310 -size 815106 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index bcf48217c6..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3080471849f0625c5870e6e23de7d156d4074e06ee70b77ba78ec59ecbda96b6 -size 726692 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 418c036788..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a8b064d3f556068839c02a8e8ad87500f11de3b8511d2cda19fa19877fa84fd4 -size 710230 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 7ac5a606f6..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5bea1be3404b17f524dabe75fbdad6ea3d0a5fcfaa660ef2edf4ddc594e307e6 -size 618412 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 824c060051..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f23b3fe0bae59bc4d50a2bc7742535af7f5eb05a61a9dc750c9e17b20b6f70ce -size 707762 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 3d6938845f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e1031ce0b0ef7bc5c75d49b341bb1f72203279e73ddf96181548c1aa73dd1488 -size 620630 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 741dfe530f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f9563566b5093f883e6cd15cf1a5255ebd6ff851cea7e7e1a2a12ce49417c1e6 -size 816002 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 435ec189a8..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6ee6e47c957773890b6940ce454f5e898ab7f573f77fd5ed2956afac4da51612 -size 723790 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index a7e7caf77a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d3f5a1def0be28c3acbf0edd16652c3ddcba2dd2013b9929a5e148286249c6dc -size 796960 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 452d63264a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:742286d4bd3353ea033600db28d2cff72a63e0c4012055991257fe2d64d3a8c8 -size 703120 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index b61301ddce..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:83b0d50c1633542f31510f2a948e8a6ae8787604d6ecfee0e955e1efcad5977c -size 814028 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 404cfd8582..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a706ec11500fa8c1027f0c111207827dc2b55f8b9cbe23e88cce4e71bd2ad242 -size 742288 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index b5ba8ae0bb..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9815f9d51a650ca0ebb065dbf55549aebb9e9fa9dff0397c27a22af8630306ca -size 792666 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 8b6a71d642..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:20f2b88de05e6538a68826aeac35464ff0e37994fb77745433d5787b10c7a957 -size 715156 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index d59009cd71..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:05775f29e51e0857f033417246424e849047e3f0be8eb0c5f991f45dff3ab793 -size 710234 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index fe513dbda9..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bf997134dd692f87add687799d2da1562d4ef330619dae5a633eb25543dffc0d -size 619206 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index b9c4a122af..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e484418ce7ae5c212eb1e2cc4a944197cbe55a6b57ccbb7c614ba3078ee2abf3 -size 708554 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index f850c3edd7..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b5f833c32ea8cf55f5b881333601377be108f27b31830e0e705dee6d9f9356d3 -size 620634 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 46222eafe6..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3da8af35aaee6a355d7c3ee21f4287cb616da0e2b8a8d99f51d30c8c1cedde16 -size 642030 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 2f85d40c86..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dcecd5ae5973bfe563c95a3ebeee7dc3089048ddfd0d832bf50dc14550aa0ba6 -size 559437 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 459f1fd3c6..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a1c38e7ebfe5938015649d65241646a9fa55701e2e110711a5fd9f9146757ef7 -size 664598 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 8756600a19..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bd48452f36c7437403ec90311c99bc6a0ae1a9b1d66a9e3c168c5dd6975d4bc6 -size 580525 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 08f7022650..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fc01b865f27da4839676c2b9111fb80e115371ef2695f42019db51dd5c022c18 -size 640498 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index b5cbf54f81..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:06eb77ad75036049becebc6793c88b051c4a94813bb1a2a7d0d3bea418d3071e -size 559779 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 809c9fb020..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2557769a629791291a2c4cf13d05bad7131228be3190446dde0f4990d354e0e3 -size 663956 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 372cecde08..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:221c4d9dbe931ae8b3396a31ab49a22f9858fdf02d57d5d0e737291e408824c9 -size 581313 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 27c1b3b9a6..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3222cb393fe0195262a1b297c0570cf2287c6e849d85e3767e8e21556543cd34 -size 708662 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index c628ae6e2d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:498ae9d9d40dddb0854dd00a253422070f6f3bc46db607d76d6c727885d58337 -size 625626 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 28dac01a2f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6cca7b8dbc9db7a32c59aa8fdef9b3dc77013697a2e57bc718cc555ad3a5b2a7 -size 731034 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 8997678dc6..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9fbd00a0dc23d933b0c7832466d590aec60d6227379a0f5e6a83e59795d5fdf5 -size 649478 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 1d98a26176..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a8bbcf7baaea25316773a7be648087ad2d475a3ccf0d9a3bf0570dfee1a09ecd -size 727724 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index bfa1e3dcfa..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:42e19dc202a76d781caa803eca2183a42d8f442ac3f2f553df4b86cf91bfca8f -size 640248 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index cf6bcf0b0c..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ae3ee809d531ccf496f2126a65668c91aff88f2cdec59d43de21efffce454f19 -size 749848 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 6e7f46b925..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:802863f4d16835ccfaa2bba8a6a8c81c4bcfd775a8da376afefa6b42487c85d0 -size 660844 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index ff8aef9420..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4dcc802acb82c3a9eb3589391f78ae3ac16a674aff12061182a76e7ee936ffdc -size 720570 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 7c2f4d9cad..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7071bd2f631686963c2ecdbc8b29b6ad0992671841207849eff5a32ce7522f95 -size 633094 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 07dddb3527..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9eae3fe34724ec5147394625c4ad2ee5138972ef7cad68c91f4cbd94d57d1d56 -size 742696 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 3e479960f2..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6fac7780eec450af2bd88301b73cc9af9384334aba71df9cc339365d119723b8 -size 653690 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 256d9c6486..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e27445b7de4bced6de7272e760719c702d4c24317d0a7726f93699d8ad06e0ee -size 723874 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 55a2f8963b..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38f2041356d90b7d18a372affae561c4264a17cb2975a3890e6d204bed21321c -size 640886 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 57c8a584b4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:26d43319e4b15db713b1153b8ddb4b59502607feb2d6ba752dd1d7ef2504c5ec -size 746936 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 970dc4827c..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:851de5566a11593408c3fe5f7081e7bd5bfc9f39824e9c31cf0bfd96f2decaf8 -size 661532 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index d6b17bceed..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:646de21fafb9af53af85de0cafe3b512196a6a9d2d924c9167bd1afb488cc119 -size 716670 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index c56f43be86..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3d42eecf49c2d7a4ec6bff6f1820c4dbc491c0aa8fca7ab03fb470e5bb641043 -size 633734 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index dc117e6d2b..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a4182b3740f6fda7279b82c6abfd371d3ea540a1bf8e2cb5a480e2727675daa3 -size 739092 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 6b645533bd..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4d9ac64f1c65d0d5c98a0a6a7b76d9c40bcca9107d5cfe6ab7cdbd019c37814d -size 654378 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 5c8d6a0250..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:894bc9c7eaa108036cf3a63a86f677e36dd84b5d0ed7024b69f938633c68112e -size 795934 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 65c326ebc3..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:def95766b43535506e29490167b9ac108385578c8d3d2442130f64b2cc6c432e -size 706336 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 444376ba92..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9a1f1abcc098096cfa52d3735f57d0834457d0e3e2255bfa5242cf145903e6a3 -size 819392 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 34c58c3824..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d2b72cb6b3c2ef56fd95ec53714a3c651ce9cc8d90ea57be6301de2323806e3a -size 728462 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 7336d3f3b9..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:41013a24bfd46615ddb9b8acec1247f04e379a352a5e2c790f100357dc89a93e -size 788780 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index f639f83756..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c362db43f13cf7d86a5a887c3996943a5d321c81a7f3ba35338a8c9a20819177 -size 699184 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index ffb3d4773e..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:77d26ef77d9c86fe55a35c921aa14d267a55d12523553a0aabe30ed116bcbed6 -size 812288 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index fba66d853d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b6e57a8baf4567e18a67431d95f2ebeeed29bbff659057d87f261c65d5ef0b7 -size 721308 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 51c8437385..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:570267e2da88b74b222d2673d7252439cfc8998d5781d2e2cbeab597da8d9562 -size 612873 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 35f51a7e92..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9b1b8dea639d2d1862f4259b77aa8cf065ab53ac95dc033fcec3f1d3d98b09e6 -size 525397 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index aa5971e6bf..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:584295c6d6db0084f918f44c7235a280aa58101d349e6bae98fb250d7d1b8c73 -size 612623 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 237e3a04d7..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:19a19d705a646501a3e7753418c4eaddf69b5b1216568fa9a283797194a35857 -size 528699 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 17ee21c061..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:635f9f2263314722a0fe329331c24adbfed8b991a17bd12886a451e5b6a0b99f -size 679998 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 5a1c6461fd..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6450e70618fdf718080baa5f8b0071db1af57e4014a8570eef6bc91648241800 -size 591881 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 8d378134ba..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5ac8180d83947811ce91454a36f347ef80e6dd1c6655adbd043668b5b5298b59 -size 701132 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 88dc04af9e..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:65a7bef21760f9dde5025c8a70cf89a1f75b1673c32746060f0e5f8962c0f27b -size 614987 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 342d5af9ed..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:277aab6be4be3afb4b255f57b1e44cdb27f925d6d38490d022a4dcca7c0060f4 -size 687616 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 9305e3c577..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:31f618b024a462c3ab36c193b6284a59939c58855ef132d0d1da638e33078f29 -size 601471 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index bcb5f041d6..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:884bea0f503bf136eb54cf5d3b514872b1256ea15c2bc075e4444e91bcd64d27 -size 701772 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 791e9e890f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:27b4bb559fe88bec137d5ce54d2a8fad79e7cba80f6c9977a1aead72e4b120c4 -size 618094 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 12777b63a1..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bab7eedf67d531ba0ff0f54f3c39ad0dfe823cdfdcb7d859674751f4b63423b8 -size 688254 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 9b024d4776..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:da9e146e12a8cb7dae7aa75f015347dab4678957abb506236ab9184856dc7800 -size 605365 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 161ea5a533..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:27bc82cd57ce80a62e105096031b1137f34197201d2e250fb502bab10f3d9d7f -size 768258 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 4a7043a84f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5affb50151606fdc866c99a6cfb2e4df8ffa7252b959e9aa6585d50d8645f10b -size 682360 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 5af58f1b59..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a25f3efc0977b920cc54745cb16420fba0aa5d898d2f406a23cac757a7b7684f -size 754642 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 18d9872086..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:798818f85bdedb0e7ed7fa9f9c4cc132e1a84b7c7292ea7c156122cc4e418016 -size 668844 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 12c989eda5..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3cc60eacc71792001026b5c9dc7813918d82d189db28e791058d21a053325701 -size 606457 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 66c839e559..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b29c8e29a59fa1f30b4c5825a246a03eb9e356bfb6f7e24d8d6f6e402884b861 -size 523717 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 8d36c6528f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fbee12a8abd4d43f4f4173fb4d5f6e7e56ea7d497397f2552f4a349acea0f5cc -size 630754 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 4899245c24..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6c834102eeda3ff370839e407e275fb4fbbb52ac3d25189d7ab1f32e4e31095c -size 546879 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 8a909ff81d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:13114a5f4e8276bc0e0c637f67774c3a269bd691bf932ea7c4ee59ada7479770 -size 604185 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index e236d8523c..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:161321c48ff6237bb7849350e8ad58ec2a5fa63fc175397627872fb51a9701d0 -size 523369 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index bcf14a5146..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b724627adb9c86014e24aaff2ee7b3b3979cd422baab44309a7a0806f6a98726 -size 629222 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 4ddc34e557..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8b4db88e5e40c97f00bdb7f791c4e2fa463c5b3b0543f54cc9f8d113645f3b79 -size 546383 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 8a9f64b4e6..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bda291c95226423f90787f2b9c0099311b497b775c25c98b079275b8524930f8 -size 673090 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 1616909f1a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a211e1aefda810d01251631131d4ce2cf63e39f48d94a6d8968a3627377de17a -size 589905 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index a2fb27e8c2..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:58e36d9cc3e22a77b4e77e62d6bedec8d0ee8fd7580103138c1a7efd12efa466 -size 697188 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index f83ca4c5e4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:14ee7d5feebb31a5b019a06d21bbda405bdba59bbc82b9602509eff0be407c82 -size 615039 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 3bd20895d6..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ab177391ce0b9026525a23db264e14128f9ca92830e7315aa4f28e638d711320 -size 692794 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 71061bcb60..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e597b3e0aa72e27a170af9ae8f7a661d7a31dcc93ab1a35834ad834f05d37227 -size 604379 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index d264d8efae..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:90f8bfc904cdbaab1adc82e92d60873ab181eac3128c2729f1cbb29ed9127288 -size 716646 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 7f6be55dfe..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8680f351d057d48394d563b4d38cebe5d5a6a9e5e63d00a3f6f1b7086e7d7d5c -size 626554 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index a83a84e963..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:838aaca726224b186a1229ac200bade26e27692c453dfed76985d951d30dbde0 -size 685640 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 836c31d6e9..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:abca5257801b911960eb0aa03e9b291b7fb424575a25fd5cd59a379cbd963efe -size 597225 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 33ccd8be45..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:98a4b21ef2589036bcd0a0b8ebd888894ca8550f23722263513d0eceef2f143d -size 709492 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 4a881246f2..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:709b415e612fe60b04150f0d4640c2a89f73065658d9370c23e0fc86904c447a -size 619450 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index ab72113acf..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:67e0613795a48ac2f19e57fd1f070c6fb29821a2f8c8bd71c79af74319ec5fea -size 686328 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 81157889c8..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3d2e02625e98e5b502032092c5079238b176775e1517c4452a81995ad22357a9 -size 605167 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 427b7296f3..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4160fd58dc30ae23c983c2daf162c73edadce9514c69a67158a2446c4dcc797e -size 712944 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index fe98dd4f27..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:da5eabcc729ed68539639bdcdedee9d5e284d847e371f4624bb46b118c79077b -size 626602 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 49bc455ebf..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:18d7841a1d3e3ab89b6421e654791b4bd5f4f48c57886eb8cead94e99a3de120 -size 679866 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 26c256444a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9905a99dc192fc9ed82cb9055031abdfeb986c20205a6613d2f28bf05ffe9a78 -size 598013 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index f67f944da3..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:84e3130ec14fd6625c790a0a4553e56fd56f8f2e5a48d834e458aa4d5e2e2577 -size 705100 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 16281e473b..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d8eb832161f80c05b75def9de80bf8ab90ad2835aaa98e01c6a47691dd162a62 -size 619448 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 692d07f129..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:768dc4809a666e9637dc14edb24dcce3edb9a6d29fbc2e441b4f86fb1494e850 -size 761004 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 3aa08dc620..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2acf02fb8fa40bffa329ee1c359dca7c21bc2628f7f145e1e6bb0bc00393eea3 -size 671506 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 7ee86b36bb..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:05cb0ccc120d3bbdc4fa7fd7e1e8d06c3697c28a1e933a5f7bca65f3bb97d839 -size 786978 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 909b030c16..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:522a23a61ac4467d44402e64f7559f0c33e5c7b3e35b3309da156cbf9859228f -size 695406 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index c81ae24a43..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d6715d99e6a8b05c2d8b6d44280c86df1266bbe61da7d0b918ef37f6babe6b9e -size 753850 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index ba879b9a85..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fbda82c0d7f5309b7295b7a155391bbe41d972da9149cea0780f6b9db6101463 -size 664352 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 2c97af1d31..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:68840486b018a65129da765d7cdef90f4c417c422b4329c239fbc9564de06f28 -size 779874 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 419c1ca02e..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:71becc6e6c0f978c0d11aff4923a097c3167471000dadbd0114ff7b8e1a37f2c -size 688302 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index f162aa66b3..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bedd3c3b6e81491bf1b9f43f1666a256e865734b9d9d2dfca57da7d21152e77b -size 641106 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 36b15c52d4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:896224c971b94595b1ac3569a067a85a9b1df792527d429d3a63d932f05f713c -size 555159 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 4461d3872f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d4ddb57902529aa78524f342cb104beb6efd35eea18afefc655d026724173d88 -size 640314 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index c636445347..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8776fa8283be1a716bb2582303d5b5e3c41fa2eddd66e65acdca2b141034cabc -size 561225 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index f3588b3e53..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b9ac2b5162909f14b3057dc34f67d04d79b545cd96ff068aa60f733d43a636a4 -size 732030 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index f1f791bec0..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2cba5cfaa6d3fa8e006e81011a15931453a7e2e4fd7528728b2efc244914d22a -size 641790 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 6d7d81517f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:17a6044ab457c004870610043ea17962c8d1647750284d56e555991b7f1a8c02 -size 721718 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 1d1de2fd0f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:93fafbf01db115671cccbd4d9a1edd96959d611c388b311007a566c87e5dc559 -size 631480 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index fa4c2f597b..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:69575dcc5de21f80db9a4d0cf244b388c88c4b7d22a69d01f9734b72bfae78ce -size 731040 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 3bb3087bce..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:be477d4a1fd76fea64ef2d8a19a116744e7ec960b5caeb760438e39459893375 -size 655010 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 4d2cc8dc73..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c0fffacda879da2274b8d972eeba55c4dbc9465a3e0bd67e350543c134b1a2a0 -size 720286 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 58d46fef5a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:31f5b63c83cfd356b2e5a8e8654c3ee83fa9615533074b8344cb0586c5ab4422 -size 641048 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index fc6e2be4b5..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7e5d1928f4fdf01496a0af916328ede02efa01b44261a9d3bd96459b2d397333 -size 641900 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 3dbff07b8f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:63173e2047ac5c7ab327d610d026c58e21974388eec7bbe71cb11098b14c60d2 -size 555951 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 0bd28ac219..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:43ed488f860f36316524e824395f2c3617d14d5539bd3063bb444320d4a5a73d -size 641108 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index d91c29f6ec..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a2d6e8a941e27806f0254855efa55d4406a7f6faba72f4c52373b074dcae2347 -size 561229 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 9d5c9ea89c..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:173e6b7e225891e811700b5d6c17f811d7e012185148c1cd15e22929dfb37c22 -size 660570 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index badfee63f4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c29923c285e5dcd56ba10aa6179ce68c30d2491eb3fe886d229431d8d0ca5850 -size 563917 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 95d481a821..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:720745a2f7a7c470a3e824ee4469455ed1d43d09ccac96848dd6bc061ecc57ff -size 683930 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index f629a1f138..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:957600336cccb742ed43d9a3e6eaed38acf1a72acdbd7cb69ed86770b7e0d24d -size 585007 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 238a94177e..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d24ed02945c74368e770f7de4ed3663b2c99b4abf9f62e2cfbcdaa08e3a99b67 -size 658546 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index b1f8345504..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:57f4770f6ca2be5995de2eeb31bdeaba39282410e471f7666f153636e970477f -size 564261 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 27a69e5315..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:25026566a4fc693c498df8676a13358b21b156a31ddf73b55370c66cb0fa7253 -size 680376 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index bea50e0df1..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:16adc7622961028dc1e7b86df50797b578126a62663b45509ca9be727d95cc99 -size 585005 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 7b73bf4222..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:33b3cdd7193558393ecbea0f7f975b9ff76f46c5a635fc6f1a0480eaabca348f -size 728042 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 41f67d1438..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:95cddaca398fd4d8669e3e2d1d99c4c940b4be62da53ec7f55adee3953b82d1c -size 630106 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index b76872c5ab..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2ff038d493d92ec04eb9d2e3d6d4fe30cf4a546d31867ae36774f6eb99765164 -size 751202 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 0c63e00879..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3c2321936dc466cff0234576f4e1776fbd3f4396a27c70767179e5b7bcaddf75 -size 653958 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 347cc06bda..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a0cc4e58d15f1faac685341cd46a80d4b13cd3ff9eb57ad4843c71c6a3924c93 -size 746264 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index d7ea3da0bc..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3a599b77b2e09129f7668e972d110c4fd7e661adb0b29a77c29559f540602d18 -size 643940 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index ac63aa786b..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e2aff66fa7e5d1f0bd4048691eee9844aa8614dcd95ed95c16ce060489af83a -size 769228 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 16c76e15e9..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:01ee94402d9ad4aaf1bbe8d1cdb4d5579ef6528683f4cc1882b0d24ecf79b2e9 -size 665324 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 1e70e2953e..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f12d61051836160845383e070a7e377cb6c29aa4bfef3f24aee22ac9a72be9fc -size 739112 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 2056385ae2..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:57d7514f75b8f108cf9b8bedfcda140130950ec14d8fe9fed13e3980b1961c6f -size 636786 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 23ab4dc9af..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:79f4be23cace464a8130eaa59280ef131cc745f549e8b5206ebf26fd29ca2d47 -size 762076 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 9967abd1ff..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3fe01c0600dd89a24638a5b3edfb563337a064cd94d0ba12e2db8f1f9a0aed51 -size 658172 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 82dc92e52a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c75cd266bd3abca9c653566a9df65c20d701208a03d063974bc765729feb9936 -size 740294 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 4f1dc4b911..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ae8bfd9e3e1041f1b4fe9fe8ad392f93c0d5863881dde1cb6acd304db52695fd -size 644578 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 9395d87095..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3d7fe0ad6b2747ffca04069d16cb3f2a98feb50aa1aac606f02cdf2ed5157a5d -size 762566 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 22a48ff84e..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:788e186839fae84d0e1e0b0353e4b803e888e0e52c0074316d50c3e2bfbbea6c -size 666014 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 5deabb5a1d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9db2f701cdd6cdb4e8d91064f70bdd4ba35b93e5579b6e4561452e68393b1644 -size 733042 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 2de2dbd724..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7a4a716e46140dc00b185e26606341d9dcfb54bb1fd78da37f42dc92541c5374 -size 637426 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index dadd74500f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d553f914399cbe38d58c4a632badf9fed6cf2a0ca2e697f441c6f42344530705 -size 755512 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 4b6867a485..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ddb69a65b4bffde74a9ff16dca177587e63ffaa9aece693789c2b5804b0656ee -size 658860 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index c726fafc39..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a1fd030caee98e7692a696f11cced43d68d9ec8ea6c624d0b31cb2b92b818040 -size 815264 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index a01d8cf0a1..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1af363ea8286a845a92f40135428257aa61815da7868f0c2c810c3c472f8c1e0 -size 711064 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 1ab07a8540..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f295c8ab6ca80eba1ae969ec92dac13a802c520909fe89578e0a7d246d0f7455 -size 838772 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 84c314dde4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:895a04415d3771ab661c454dc0004163ae5960b5efefcedecad1db34c434a392 -size 732400 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 32ca783bf1..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4cffc0811f20c845e24d304b15b4e658a740cb9f82f4b9544819d9ea8340be98 -size 808160 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 51e53300b7..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f49d7fe748fe4a947f0ddccb573007207de2c81d5820c001cde5186da5d57196 -size 703912 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 415d1a7d17..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:39fc28da4f766fd99ad75f499d109e8c0cad349e270147cb2c7ce1577bf23179 -size 831618 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 04ae2b9e75..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d312f084df57dfe63308cb46d78727c0b51d144cb01eee7c2e9bdef929bdc01b -size 725248 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 769dc0c0ad..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f5784275e967924deb97ed118dfb9d46bfd7185ff06ec21eb833a575a97045e0 -size 622880 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 22b0520473..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5d0a8ac652dc32826772aa47a51495d1aa0f2de6f655d855a295878bc6e51c9a -size 535847 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 84beb87969..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:67f0e3a63a2a5953922849c04268d435d884106f4c6022d36098e28fec12122b -size 623076 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 02823be870..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2f3cdba471c014f398f85468cbf46be5a0b180912e7733da04731597452bd2dd -size 537571 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 9606db0b2e..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:47a412522cfc2dfb42f395f8cd65b2ee3aea78c66cbb51997c8a4fda954e3370 -size 685368 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index d153832588..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0d0cc39abaadd67afcd45d2ca0450657f69241c991691c536b57c77a8b01ae6d -size 601541 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 4b5178a09d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:96bafe58a0fc6396492c25079e114b8c8e006087c86eae61dfb3fa4ec7cc03d4 -size 710794 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index bf9665a38f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:be743a0bb06df339e85c2f6acfa4f8ee8505d62b63b3c140525dd5656cc99448 -size 624650 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 3518866d0c..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b93806a9360f6121ead301e6c72d33d19066295a990b539d29d3d1acb4ebb66c -size 697276 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 1b74545cfa..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2ebabfd9398ff643000c930ac6f627c394480abe8935e87deb062f4bb83e5b40 -size 611131 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 7099c86c56..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:668ff2cc32d0c1c317a400923d02fac9845c246450482fca70705120399adff5 -size 712222 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 47e2832f7b..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8917e8567877145b30bfcbc5eda59fb4dc1a3ab76734c3e50292c3933913967f -size 628546 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 5ba93b2ecf..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:100fd4f59eae0acc91ac3e66e0487b6122bf2346392cce150ed95fcc27cdf67f -size 698706 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 0a594e8252..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8d3a7066764711961d9f4667823d86b4b4fe286478ec9a8594c4fc3eb53fc217 -size 615817 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 6de5bad61a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a7cb6bf7ae14f2b8c22b8c45db3a1f0d76de9b635f6fb21872fef0dad7b08457 -size 777920 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index c6f2ef722a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7ebaf28a021bbeb7e6bdd4cec8df24389b440846a4669c1c50a84910f86541e9 -size 692022 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 4cbcbb2a62..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:19abe3bd04a872254f949733559a97fd461ae790652f22e64b4b4a14f2665f2f -size 764304 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 0949cc735c..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:18f977d138949cb5e741bc710619cdb04f69f3d7ccbc77e51790bbf751b1bc79 -size 678504 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index c7f0eaa1ec..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4cf8c1ce89716ffc1d330218356a59962a52fbb0fbad58aeda0344d089269817 -size 662394 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 4bf2f30dc7..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c3ef4e0bb226b7f9415158986d1014a2413ab2e748a2beb6140341a97390d5f6 -size 529629 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 4edec1708c..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:25398e143c9eededf3473246eea2ac16ce22946394d2d14b3c52e080523d0378 -size 687380 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index cbba8673f8..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c976b94566c4d191dd558b02228b5b6b86b77f381242f477f36d30d9ca9c0ba0 -size 552001 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 09a1c1195f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:543d7b6dbd7113ab1c4f5e69080340a4454f146d9341ab705761de8528b5e7b4 -size 668904 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 49e449ee8f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9b7b576c8ab07d564af8e2285fabd3dad871c547e18aa9fd4d5d012ef8229ed2 -size 530071 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 74516ef11c..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:37e5048d808cbcefc7a943910e4efb0efef9602dd4bce1a6a6a6fe7e3c7bcb1c -size 693298 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 15486b9b59..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:067ae4240959b9f246d0e94541e5d406364eb7bd81ac60bb892724897ce44111 -size 552295 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 88459914d9..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:eace7e996b57ed034ddde36e4d9295da7fe23898dc4750b4a7ae0478c3fb080b -size 729816 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 918b0f44a4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc00f8f56a620c24ebfeff0c9a95b9dc4ab1083c162470c51b6b8ea1c6ce79b1 -size 595027 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 5821ea5730..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e0e60e442bc3912bd231c0e2f7ae74a74f56bc6e3c0f84901e47e2e3f5835bef -size 754654 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 5aad2ec6eb..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9abb518ac2c701b5402c21cf283d3a5ccd168ec67bc8845738f934ae383b740c -size 620952 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index b46ea74c27..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3738a738868094eb0aa43c7d3c985de6c4bb7f4d0c61c22f7ba212ca88e41d46 -size 748730 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index cdc2969ee1..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:98303daf3198f2174ab67c8e8d6ebf1644fe8901df16cdd0e5ce440a8ea5de55 -size 609501 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 993ae556d7..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:de4e0cf5577b7de664c73dc0d4760a27b204f0826d5dcad465c6ccd7e86a5d1e -size 773322 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 58d5b36d18..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e2e250e330aefc5e100ebb677d16ed2ee3d388e9c74996412328c3c15b990ecc -size 631726 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 18b233d093..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:51e97130ddb62a233530fd67c833ffc070530900207ad32dd6f231344c1557e5 -size 742366 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 03d6c5e9a6..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:371910fb80045fc6c3e05e2a684992f98b853755673ac99462ea6f3adab4789b -size 602397 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index f86e90e9e4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:95d3dda06df78c8ed20390392dd8f7abe614246738d3891b54b995119f714ac6 -size 766168 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index ab49e10a3c..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:66ca9b42d69e0e5f6957f1fc1cfebb4e80ee03cbee1fc8507c9341465178e1ef -size 624574 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index ec2c2938e4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:72865d3ccec7f9b4768cfe38eee42e0f4debed091a2ade8e7bc316ff0273804e -size 752230 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index aa3b54f544..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bd8464abd563e78722e09d0eb9f09701b90cc0fd565f7680c29c90355098e475 -size 610289 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index bc5625a314..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:02366e5ad49e6570c82a9c7bc1be4323e55cfe0523e5e604057eb2a6f1354d38 -size 777020 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 638f9dae73..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2f350692d20ee658549f36ed194b0359253f9d54a96ad1ce694dd6ca3cbaf006 -size 633304 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 12512ffc83..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a09e29aadc64dddf03317f75599fcefd6628e5a5b276826e3ab279bfca0b8d21 -size 744978 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index a91c74e86e..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a5eee7b7f6ce06d8cfa48b9d618374726e2ad105f569a78103362fa94bd40647 -size 603135 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 95eea48381..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:42e6550674d46c3ff3bd1c8734da011de34f617cfbabdb776f8c0c858245c80a -size 769964 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 4d0faf91a4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:33506086f3176b229a35162cb3c0a833b7b5ffeb7f82352e7f9f1330186a90de -size 626150 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 75f297d6f4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8db70ac3013e50226ef0e4647ac3f01751b5493e9355a6270e8c9ce3098de793 -size 818518 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 9b74ff8457..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b4fa2b1ea6258132aca7004a88d17781c23fbe6d32e7706cca36742ebae4199d -size 676628 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 9e9b6e7f51..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53f50fba2fd0c1ba476dadda4d2d9d18a651ebe80a3a962145c06d020facc8c3 -size 842864 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index e5de4e1f5c..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0a5d3458ce47b0a02e499c140cf3ac2083d526a5be311ccd2a09bed202d52810 -size 701368 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 07fdb05ead..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:755e4364567a9bc5ca4221598f2a4fa8b5ad2e0d094445c22cf6592c36978ebe -size 811366 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 4c2cbed198..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:669ac68efa3f17529c3bb0baf4b9074cac7bd609ac89e663fb497b225d50fca3 -size 669474 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 890f04ea2a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d7c3b3c3c0c3c14bc9a419cea86fdf3f39fcb1f9b4d208ebd854d7c413b067e7 -size 835710 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index bdf0c084cd..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:99591729e8aa62ddec293691a400b301624c96c648f684949e7431b5060248fe -size 694214 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index a8609bb870..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cf310bb361868dac5612ae07bc3a03813130ca5fb346b1fe1b2f2943dc16e84e -size 663038 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 51b5293c77..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7b291bb848492caedde443ad6a204bdedc89ddd002c22cb07b458430faf6314b -size 580297 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index c3df139996..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c5615939e6dec7d43d543148d29540a6867d8902a12810c4a54ed5ae55a4bd41 -size 686396 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 492b2e13d3..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ffb962eb05fad6b81d36a6b500e547936b09211d5f8c24c0d6458d3fae543988 -size 601385 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 8c42774f21..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d26f6d31c4c0ba623d8d2e7077e15492f9436115aef151fc769757f4018268de -size 661506 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 6459378d96..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:43cba5c6af057b3a6e79dcce8b9b81bb7af97c6ca4bbdbd69e734775233cfe56 -size 580639 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index b7bb3009bb..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f2294c08287d7823ba34b69c405ddddc53795a33516550617f2d2fabcf55a7a7 -size 685012 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 0e6f270fe4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ea846c1d16fb62b3fae791a50585689174eef48797695d193b5740bcdc73324c -size 601383 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 00aacb59a9..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fba5ce0ebf910e01faed65bc3bc04930087c05d3cdcfb2bee3b591a08fb72fa3 -size 730458 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 1b06a02bfc..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4078e77fb288a609d6d04c42aebc5b6fb8935d9613b024084265fb992cf224a2 -size 645746 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 7075e0ac4d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:19272da60e47dc94fb963d82c4ad766e6f30d4e56e423bc1a0387d2bc964313e -size 753620 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 79f3d00fdd..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc0976ccd25c4144adc963b033c017df75f30e76c800f65bb33f1cbb4c8f7218 -size 669548 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index fd296c87ec..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:890b9a51f18ddac1e911d412219b5891431968dcfdf29e90113cd3f3fd936b79 -size 748732 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 5a82fb397e..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:72736630217d11e797062f98f30b17b5e0e8ea677ca3eee0623ca427f1fb7071 -size 659332 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 2dd2f59529..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dd60be3ee53750b35e8c6c47a0f23ec164b2a2de740119c958f5b12fc6876e84 -size 771646 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 6deab8358c..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e6acc402b18aa2168f62ba0120beabd48c6eea1df16dc4591b77f1ac000c43f7 -size 680716 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index bf97be3599..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:98e0b2f902e97609ff03d4f4aac61f295c2b1ff842df9687ea57cf3b409ac712 -size 741578 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index ecfbd51076..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:958352e240a370cba2c9a1c497bfb7bc148a0a4e14e164cf5c891bd2284583dc -size 652178 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 3e702b23ec..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ac4018858cebeecd4a5da028f5fe6843896ebdd70a25a66179159c4e86b0765b -size 764542 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 4678b4e941..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2963f06123c37fcc319cbad0a31e92ee95f7b9ebf8163d08494cfc3d829d945a -size 673564 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index f038c33daf..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e938c88ddf4529900a908c88d5545b461c38b64e1f2ad338cc4ba32513639cb6 -size 746608 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 6c3d1c9e2b..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e4de280fb09d7c95ec5295551a5cda48fd0e566dec8fae2cd55a548baa3d44a8 -size 659970 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index e912f9db8d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b25c5fac291b05c2530c072df7f9f459f924b48c5b9b8f0f772a9491f34a6ecd -size 767944 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 4db11076c6..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f9d7e85824f5120f0a0d5d1ab49ecad867e1534f6c0c9e3e0361f26bb14c6501 -size 681406 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index e674174c63..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:585506b5274fbf6911491d47b177f0462b54bac8d91d48130d40254fda997fa4 -size 739356 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 23281e9e26..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9673b3d583fe4588c0ed216df6a490b764d15ad740a76178ad6d549ae88bc0e9 -size 652818 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 6dd7beff6a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:69c641179ca657473602d0a91aa91acf4eb839f4546a687d6a49b911d57b8df5 -size 760890 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 539ad4294b..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9432d2ed6ce3ee907e268d6cc84bbc5a3b9b12951782198e42af7c803fff59e6 -size 674252 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index c70af94f9c..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:48cb7d7fd907a414530d8170fc2f1f5f7574230ba1e5a32de796f48019a201bd -size 816942 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 3994fe986e..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2d5365de6b28227f2173713ad18fe051171bb09d8b79bea9902018040394e3c1 -size 728972 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 250175a79d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:44fb8b212f19636bc869bb35cd7abb30d2be518925bdbad39830871c8e9ead23 -size 841238 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 503588d482..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4217d2d0f29d52c6c008a5a714af988c59f93e4050d40625b37b89ab84573ce8 -size 750308 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 1976a1be35..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9f056cd8d28f5aeba78473b679f7c08c575f0f9766084154649e3c497dec294f -size 810578 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 7bb1e7025b..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c5e0efadcc69c176fa8ca4a253e4279c0de9aeab7fe0f0106a6e1063b5da3f9e -size 721820 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index c7670ccc90..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ed620862af435c812e323c5d1fec8ad02014cb348f786101fec5bd26f9b630e0 -size 834084 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 8137c99e82..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f3681612f7b20b7527505e2b6ca4e1408b6610934163bf9243e013264bc7bd10 -size 743156 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 4fd106a010..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:19e03e6359790623901678bba7895a0a741566c09874cbe448fe7cdb48aa18bb -size 636890 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 3218352520..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c497468878dd48cf182eade63db7debe921d659945411dc77cfaa73ab5ea9d34 -size 547835 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index ca9fecb7ba..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6d07a3d833594da7b33e92e77228c758654e7499ee11604ff875ee61651e1f67 -size 637432 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 8db04da4b9..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5a970db825bdf189b9d8173c78ebe145faca27230fd7cecec65cdd1187b74147 -size 549559 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 5b0db56e06..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5f278289862667cb89d8bdb4d861d734e9daa27738d00d0bd3704066c968baaa -size 704806 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 877c053005..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3f2b60c08f7c1d69b00926b169de8382712e319d3d49427fde539d4b6789ae31 -size 613529 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 4244bd4a6d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7924fd3a5994193283c601c5ef56971aa88032a37ccafc61de7cbb944dbd8921 -size 725150 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index a423996528..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d10f77208ecb6c3424ad4e9b3045bc385fcca9d6a041c8c4cbb1673048d4dfa5 -size 636540 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index aa43eb70ee..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b612c1bd6cca81d9aa900d79c751010f468de5e1461f27e6e831406503b6f9fe -size 711632 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 4a2a400e2b..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8557a34fbf5748833de1fd16dee08aed98199e436c0f7c2b1c92dfa72aaac9fc -size 623022 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index a40eaacb19..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3cf3dfca5dd608a0ba1b58bc16bd7758d4f37792ba5676db9bbe801e16c33308 -size 726578 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index b46a6b6d4f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:702a013de53a60f29d853809ee94340c8b6ac3c7a3729f3a06e4193bd898d69a -size 640434 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 516b44621a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:82c2300a471a3f6e90b55773d47635f8e009a1ab4c081ced85590db44676644c -size 713062 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index e3ec322351..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:57688accd4adcff9c44b2a173ad8ba59acf0689f027d81ad5d1e8432c21940a7 -size 626918 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index cf18259762..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3e90fda3998956ef50a6b2cfc68c07652fcb37fd170f0c039baf931350f2ba57 -size 792276 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index c600b519ff..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0c755adb0ec551f2b12aae27760437e23a0c061b601f5e544937e5fdca2ef80f -size 703912 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index ddf2ce9a9e..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:349f2ee6cc28232e2b3cdba68041f3d6080319df9902c039356d681f3ec45ea4 -size 778660 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 5862e3bc51..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ccccbe2c96454dfc919a5533fbda4dc0c682bbe43c226d5ce9144e6c7f4130bb -size 690394 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index f5144d50db..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:68faeb09faa56b87931fa5fa8b30bf806fb07975d71df9b61d0e2ed21a0c5d66 -size 602305 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 442f966939..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:15df02e743409f95c27217670e71a84fd7059125dd55530e98783f0161574439 -size 520355 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 00f9e37f00..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1becf3fab97be9158a36cfda74b22b5142c0d67b36d6723823233e6bb8a2bb5e -size 627342 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 83c8e33301..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:06c6717f0999a57fc1dea4c5cec4ee7cd7b210c95529c577d75ec9c1416838f4 -size 542727 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index be8ac9e032..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:39c5e13622d301783303176e672a900c25e1f7eedb9a8b3b50a6c8b651878c1c -size 601613 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 94579fdd0a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6523341e7ec79fb8c1ae402512cbea1e7dec5b4a9f973d58e598df8de46f173f -size 520797 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 538b811bc5..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b8fa406f1a1e5f93a8fbdb90da9d7676481f6bfe58c18e952b9db3c3a6418078 -size 625860 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 8eb63a2d2d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:24ac173f412035dd01aca6413ecd9888ddbe74bc932723bb07508b5157501186 -size 543021 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 85dfcf1ce9..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bb853244c0f253f4cb3d1adac18da3b180437d6c5a75901e2828479e8f9f3f42 -size 669728 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 2b54f19c42..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:231429ed66623621b5548c169d0a19d99900e576fb36917ba5a138aecbdeda15 -size 585753 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 2a1a41dab7..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:185faf881eb9ade443bb388d984607c91d21ef12c1ab54d582770af2ff65bb64 -size 693826 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 5d9c7d92b0..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:44543cea3838b7e57e14904721fa4ab04b788570baa9a78fcc854acf581507b8 -size 611677 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 7bbdbc0586..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:535158dc19306724eb1ff267c831dc13a294e9f80303630364b2cbe2cf6c7b0d -size 689430 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index f7b62c9cdf..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c2d7ab781937c39b3d33c841f4491aa2340f89fb7c0e65a4c47a9cb0edd80aed -size 600227 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index cdb51bdc98..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3a0c5570e8520cbf2fb91dda50c12d844bc381ea61d59c3d2b5a2a2e55fbf5c1 -size 713282 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index b76242fe2c..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:09796f3d6765a7cba3a0d369c0f5a01037a59445eb8702a136768f65daf9386c -size 622402 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 129af3db5c..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ee9fc589473186e7fe51ff883025ad3b5f6a15f5ddcd5fd03523c18bc0452529 -size 682278 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 24acc658d9..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:032e2db7c14c3ad2cbca83b78e7966fd96b2e2c016d8d4fd06686e8650d01348 -size 593073 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 5f027a2791..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2172b1719ab3cbd61673647ce917d9cf5e84b488fc45c465d20193ee56018238 -size 706130 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 3e441fd840..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9566ef90675ee995d0aefe828e3239a3c3699fd67416c20451de43ab3c3817eb -size 615249 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 38bb3aaa19..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:76d4315d580f4522d878d5894bca9d76dc8ff47ea4f6274ba53532f21bbb5bcb -size 683756 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 8269dd7c9d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:37e843e7b2a3722d4f358d2d7551647666d4a8605427c4b90e036e19c788de74 -size 600965 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index df0d4ea41b..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:76f39115ecd8da0450068867d611250e84e208602d7369c834af7809f08a25cf -size 709580 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 5839c8b812..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4184b7f2553421be4cab9665396f128bf5f062c7ec1318463a0ad82e7c63c798 -size 624028 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 9e59bed78d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ba24db9ea69dc4e2a3950c784f3d807105e002c20d76d4f24cfd56d3ae6f63b6 -size 676504 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 6562ea6827..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:11f17e14b80630a9be45073c325bb380cd3c94dd9a929aff074eba455bae389e -size 593861 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 6e80d1f65d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9eab78bdebee04e2b39b69477d6dd618061eee8cdbd1b8b5696fe25beaf91d1e -size 702526 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 6de1403dae..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f70dae547eb72a75df1460e2453ce972cb84acea3f5ff250ebb681760964e442 -size 616875 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index ed409ac034..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9d36b9691795dca8cdb5356d45d2cca10d6b1e9852d4ef8dc2d6a95281e83c01 -size 757642 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 8c696268c4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4b777cec28cf2db29bbcde5be46f7b8788d5117d8954d8e7013ef69cef265ff9 -size 667354 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 091b35b58e..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2bbc0350b8398132b0abf5a49d4d1decc8bdc64eed19f7f309aaf0b726441ec1 -size 782826 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 5a3f7258e9..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2ff2d07c9f3e56d8e611af8b97f5786d1c6abbba4f97494d0a117be6d8336dd2 -size 692044 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 98734f41a9..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0be24ddaa14c0e44f21ac7361f9ead07aee1363d77818abaf8694e35ac7e9209 -size 750488 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 93aecabba8..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6edb884c0a670eeed84b6cc2ddf0cd4572855820f36e2d80a83a0ebc88384ea7 -size 660200 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 1a89805eb4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:30420e9d9c03d02e95d24b2639dfb9e893dafafa49014456be356957434874af -size 775672 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 27bbea5296..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ec4716ed27360618423ecdec5bd050415393442cbac92e21f7260ee3f2a53a56 -size 684890 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index cead825351..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:544174eafff8c1eb4e0fb859a77f37aaab0e115dff2ac498dd5d3ed5c1a8eed3 -size 644390 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 09462d1399..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3ec28744adfcf9bec058467512befa81c02cd8bde2d8158d4d612f19ec8ff267 -size 561057 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 3d868062be..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:99b68f72872974f9ac8f7a8ee8ff70bf07e5f079b99754894339d132ec7ba918 -size 666958 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 678ca58763..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cd743ce61c375813ea3f5b0c7accb0ad302ed4368534afff77bf7c54c57bc181 -size 582145 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 154161f787..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:45160cdc6d6fceb6e146925a5d05563a9afcf38e93e39682fd1e6e0d99a3b856 -size 642118 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 5005df8d49..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0d35ca426f4ca42a3b8c0e7171954b7ddc359480f8278395973cabd8766362b2 -size 561399 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index a8398ab723..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ccff2c2e6511c8878b01c63139fd8cc0cdf9cad0d4228b909fc6f30fe0a95752 -size 665576 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 9a18d61205..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c094ad385979427c18cdbe6b58f28e4127ba1c67987bca73f0dacbfde0479d4d -size 582933 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 15ad394a8f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:eeff196e1a3a3857c3e441af3c38b22f8fc69e3104da528fc7d67e74d2a259d0 -size 711022 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 7a8e696af3..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3d7b921fafe6cd703ebebb16f23199033f5e94388e450971e2e0d20853c9c2f3 -size 628034 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index c76a123956..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a0a7b70d4fdf1a058297738be91bc538b6090e924726c77df7d20d8365d57db1 -size 733394 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 8364db595f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2546e1659d8d6d8a1e7c7d69ce41694f067e0d9cb86047053009e4b731cd0fe3 -size 651886 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index a5610ccfcc..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ee5d0e64a9cd03570bf71ac5ea1a4e96f5db2860b3c8d013040447e66ce106a2 -size 729294 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 5bbc546208..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b3fee9ef44b75f83cf600bea60a58037ebb3a01c403ff7d4910d7ea8e06834b7 -size 642608 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 133e9560b0..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:14e7585263eaa363dcf07eb9c0bacabe3d1e07e23b3858143c07b674c9256b25 -size 751468 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index a3bad9cc5d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:29652916fb16b3d8882336264530a61d67462dc4aabd23735a6812e2310dc1e8 -size 663204 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 3de0e219fb..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2428f7917c025a0abb2645ae582ac45a6c4f45ea1f404519f59009687a14e17e -size 722140 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 63ab93c2ad..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b2653ab4259a59129f508949b482dc4195912a4ff42fa24f2cc839ff0db18b97 -size 635454 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 9755ca81bb..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:538f4fbd4fe02e0d9d0811e1a06c1f531406616feb20c83c2f3f745c84952e6d -size 744316 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index c8cec41583..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c86f4cf8111ad55e9c6db9deb7be17c0feaeb1135fbcdf3c6d37f9b4c7f262d6 -size 656050 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 6e6eeb7795..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6662cf4d4f2c1567c0b7664378f27a3128cf41339b2d98c352f58ea117ed3a94 -size 726282 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 3f923f91db..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f2a8b09499253a1203551b159b749ec35c55233e1202de90596ad90a540d292f -size 642506 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 9868828fc3..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b7062bfb6aa5bace78622b91ee82139cd8b2ac6a2ad7ebbd1d59ce23a4bfe162 -size 748556 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 3cb0e7148d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:680801e5fdced738695fad1226c496bfb71a0108176f491145cb2f8dd091e3cc -size 663152 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 65d1a554a2..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:138826f7172863da8886aa590c616c7f22f47286321bd58dd0921acafe3a50f8 -size 719030 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 64791163a0..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6e30f2ed7de6426d682496e0b323870878a8bfe53bb4a52254fdc0ff513d5827 -size 635354 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 012cfbab77..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:439959c6a215375000dde6014c3b97dfcda7db4ba2046329327f4698f230c8fe -size 740712 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 4907857cdc..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8cdd9b57c411c41b616d9593d7ff4c173dc821dcee0fbd613078dfec5cd053ad -size 655998 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 63b8ecd996..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b5f078efa465b78b3729bee4ed1a37f4dd35a11a3ff31849e7438b0312aa77c1 -size 798294 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 049295e859..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f1f58dbcc70e216e9b9ca628fa969e3bdc330db875fa119f46784d76a43ba15f -size 707908 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 530a2edd11..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cc7f7bf3c06158819d4a7912bbfa3d1a110f65a57961996009c9dec47791c431 -size 821800 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 267bf3596b..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:61bdb80a70cf80102176716c0b5cc13d33b5e3eafc33a1e035bec58671c5a2a4 -size 730032 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 0e3fa9bcd1..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3acafde7aef0ffb95a9567d2fd44f2385bd1bd740575b679fed4ca3069f5264d -size 791140 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index feea3ea341..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d6cc996648d116d32ff13d271caad14ca8aacb47601284c2128349107c4dc36b -size 700754 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 0825366af0..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:21a37e8b6cde333681b782c7e9a2e10423eafcb9fe619bc554ff868c380af915 -size 814648 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index d989a1123a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dbb89a146b6ca7dcb37ceb4b69919a40fb68ef808f46c4d31e0b3c5c3693acaf -size 722880 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 976fc36408..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:04a7c5bd5a47b41c74382b0d54bf4e913e25d41bd85316928d83d86b7d4eef24 -size 614443 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index d7fff09a2c..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ffb1adcb7936b8849bf3dfabc1340f17f2368f5d496b4aa17c51eaec06650c05 -size 527805 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 3ae7001db7..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b76c94ff040d859bd364da67b3b552b5d24cfd914d9653eb529253b636fb1ab3 -size 615033 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index f7888e4dde..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:147d413d476a0f093db96ca3c8b6e4e1cc0bc4dc874188e4ae5a1e86c3389042 -size 530319 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index dd5b974867..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f100487d66f800f35f06e4ac294cfc33dee0bd999a0607afd4dc6c3b1aa20b34 -size 682408 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index e07e3f5c82..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:25628c74464a81a11383061cc0e8ebe180cdfdc28e460de89ebe0c60a66678a4 -size 594289 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index a5480943eb..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ba244b25af2320e0b9d6d2af5f11b072b1bbd6d22be2373f179cf60541bfd8f3 -size 702752 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index f0859f8aca..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a6b812c3b26531b203db5c46f552b56c685189287a03ddf30c129cf252cba565 -size 617347 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 5d5798eb00..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d9f1acbe6537a0a7a8bfe1aebce35a1cdf8d26a701569e6738677aee5f394bb7 -size 689236 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 58f302e64e..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:79d6923eebd6adb5e4b8a01a102af304e8f178c87601bcf632ea3221d30d2cae -size 603879 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 7c92ce239b..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0191c278bd7a2e1d3ee87e6cfcfb73baaba77fdf24f2ebe4226634e4b7e14ca7 -size 704132 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 1769b30ba5..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:771ee7109b63d4e79564687a09bf442f894ea242d20d90fcf6442b1db721672c -size 620504 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 6dd562e0c4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:db1df0a2b4617244d8b81fc9fc95e8d5a7909490ad2e58571cc7add5bef5bd71 -size 690614 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 4249cbd946..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e18ec726230cc4eb39504157179344bcdb129760c163c3b2d374323de99dd6ac -size 607775 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index bf1427d65a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a582d56c334007a66ca2caf67d34ca9f4a8871b22e26592c53d6ae63c28db496 -size 769828 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index f324e12570..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fee644d126c03516fa111909f73014416cd63e713b62b29543513c0407ff8df0 -size 683980 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 616453cdfe..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c5a516a639be7b5be427fb4895f81a4a57705ce2da03d3eba4219d026f9c51e4 -size 756262 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 20c7656f49..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8261b03f863cedd3e219c763041f2c934f5c975d8b6aa676ab8889b45e3bd57b -size 670464 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 3036a85a70..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1f0d2a729cd2ee2d0a5dc31353d42afcd0455149d5be8001d3cfe848ee65a3b1 -size 611235 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 86d84daf12..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5dd22d549bfbc1c6f0d410f3892d86584563b83f04314964222e9b4437c79724 -size 528495 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index c592d7a6e4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c60c70fbd70a8c1ef4018cba6c430dcf4499706facd5b411205433ae0224e8c7 -size 635532 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index af16c6fcf4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3a02034ffbc5b0e7362ce4529de060604607971d932884457a7363e957d719a1 -size 551655 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 45d4fe0c1f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4743d6a05d66048d9ff20d78cbc09a240fe3109563638cda079d8a0ff975a883 -size 609753 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 7134bb29f2..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:751fd063d538eaf216038ebc47bb20d3702f7490025f07342f136dbbcd4e071a -size 528937 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 1fca30e966..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6c078c3f62fd98393558944e167b5ce3e23fce2121d5fea18260f49e557dfa93 -size 634790 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index c5d79b1e8b..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8eafaad9d2eebfd5c1d2f573e4e11c429478a9c7d24726e092b97f7bfa5375e6 -size 551949 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index f02afc6b98..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3da985f4eddb8854fdfdf850fa55f96fa6304d36037a6ae4cce18494fce93ada -size 677868 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 5499eea544..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:24c24bf587d37a50302e7abe9c96a89e3655ae4fbe7814db56087a9a5b72d0c5 -size 594683 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index ae1a104ff2..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e47a217d3b436e2ea4c60031cbda117cb333469709a8e171f51ee18b5c690d64 -size 701966 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 28147bbbb2..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1ce4e564ebc0ffc338da172f102fd6fe021610f5eaefaade0e9b6c66a16e535f -size 619818 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index c5d51bebbc..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6dd413a47b9694ae59f79e6976dfe6d5f8982b758e81a3769e6a1035979674df -size 697570 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 6b7d59bc95..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e651f0381de059729850a924cea2ef334730079f9a3f6b0957b3a8953f921e01 -size 609157 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 278712e8f1..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ff6da3e60b11eafdf32e9b979f74730af4b482f0696176107dfea4cb632a3b27 -size 721422 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 6f100080f1..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:609372e84602b9599a8cd8e84ac463fab1406f478836faabbf2dc7c369025e31 -size 631332 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 0154156d20..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:50c55fa91d36a6a4acc064e68c771ec08d5f295264f132b277f5571790e91bf7 -size 690418 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 1ac3229d9b..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:da316f384dab0215de32a97778273e041eaec935206bb7079b4e4267571ed734 -size 602003 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index cf0678dae7..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8c56d17a811b6ed7c22ee12344ac2344bf27fa8addef9e20e44a790223921e12 -size 714270 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index cb40fba390..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:aa7e928fa7c018917359aa4f7bafa20bde77e483fa954372fe496427653b3837 -size 624228 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index cbe6555d12..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c21757ecd97d6e6adc1a8a4c5e2d70cb48f4004d3c5c788e404ab7f9b30f622c -size 691896 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 9a62c482ca..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7f1fb33815484bd599d744ad3245701285e8c6c3b5a39bc26c7918fed9ea8492 -size 609943 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 707b719409..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5ab61d1ccc6bcf580c19bf21ace9219357d3e36ec6f8d39677900392e36f8393 -size 718510 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index b1c4a3eb6b..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a1e954c1c259c4c69acc0d0c7cb37a03e6bb535cc4ebc906a07b8ea4b1e5582e -size 632168 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index d454d88089..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:eef91cfdb7088ed52c8a78bf85289b6df38d80b14a72adadc85a7c1c696a0ce0 -size 685432 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index fc386fb682..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9c023e993ce5e85e12e07a1896e681c13015c474f256feb6deac6e57ce368dd5 -size 602791 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 32ee237ff1..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:436cb546865871114056ba72112a781ebb239a75e0983e92d2b5ca9c190c7d33 -size 710666 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 53fb8f520b..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:287c9eef170b963e1e2e62766a83ace105687cacc4be41030875c68fdb6c0ded -size 625016 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index c2a140e7c8..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b939d3298ae61ca4673b2a0d9f4cc7128de18dd5c068a21077b85861b740d91 -size 765782 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 1012a69297..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e8e12a4aa539c80306ac1aeab951830ae6524b286392056c9897a7d37fcc390d -size 676282 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index d692dfbfea..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:738637a3b45ea76884d0911dfa25f39f1bea9acdf7525d3c5cd6b57c7bf5449e -size 791754 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 9882f26ba6..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b09fcf2f39d4fdd894c63fee51a78a1ae143982acf4117d910add93a06b2e93f -size 700184 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 54cd69766a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:707c3c4a285faac86bc5cb2427f23d3377e3becbbccf16f115edeaa32ed09bdb -size 758628 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 86f935c74f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dee12e4155e2589c3dfb3af3fa0067c273860d135ff886712d97b72105c9cbe8 -size 669130 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index a5e599c7ce..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7e8b90d156cbc5e6192521534372e7358f9b4f7cd821f6750233f4af26b7b129 -size 784650 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 5c3ccf40de..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f07ae4783dd6983c9190d93eabfac5432f17c29ee9349a2a6903bc78f1bf07b8 -size 693080 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 7049e43c8e..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2d9268de33fa9eb9dc10420b7921498b43bd0ba7e99e13c77b9fae6fa3da1c33 -size 705810 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 2ddc553ff8..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3091a2843b6e19693d4edbbd1fa68dd855504d33600fda5d8fb01e6557781073 -size 618334 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 911b85a0d9..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dad2b8469123314cdf11b7a898614d8413e20d6452478a3f5ff6037aac96a65b -size 732474 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index c75b4345cb..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e3048a40279adb2522a3a99f1d568529e59dd83dd833ea0704a395490157a358 -size 644652 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index c6f3fda05b..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f23e5ed2f0ad66156d936975200f8b2c4fe3a330c8fff1d0c47392aeaa7a529 -size 703094 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 81d4684ffb..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d19eb459892e576f645d05fb61dde377d1d069570e6881627364cdfcd77c3249 -size 619220 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index e33f9bd436..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3376eac07e0dfa355b2fcd9980746387624d473511b15bbe5ceba09125666110 -size 727390 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 346c152468..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e750ff4b11c154918862dfc7871ee179d02d99613948132046b45fb02a95edff -size 644354 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index f85640e830..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:959c3cd24e4739c554fc20ec61a2f2d600b018e682f464c77854644662e70baf -size 772046 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 5039fb4167..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c94dad98c82544a50bb7db9124d8895773ac0af5baf8ede8ef8dfb9404d7931f -size 685952 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 1264a4c509..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9dcc0ffcd86cdc778e9e65324e157a44ac166340f0a05dd23cbf7367dbdf7678 -size 798958 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 43a258e838..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cad1f2c1941c650f0f8cd6583fef58a494de4cae7b1394c71432e26ad1d38186 -size 713602 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 03ff92df2b..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5f0e48bb9fa6960cb902475fbc9ae2d6772b0cd7056dd45bfb17dfbc5b696f6a -size 806796 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 54dc9b459c..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:60f39b7cfaf89b9c19d72853075c1c3a32ef1e55f34690cfc7d8ce46bbac7208 -size 712414 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index aec7767fcf..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:03d418b850e8ef416d469c32805ab454193e4cfcc8b2352864e08ead634cdad1 -size 830304 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 0b80245c18..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:021dc867cdbab6b5abadf17ac06d558947974ead248913ba645505da966d8728 -size 735132 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 9e79b6dc83..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:395940a15c9f1711e2f3aebab34a699d198acb3942884157689e16d4b7cbef3f -size 790468 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 64a93b3f7a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d269995560b5205efcc6cb541ba4a6151c49053602772ee53332a060a707c239 -size 695444 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 8f57770374..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c29654896815324be1a7c214a7ea4f8776fddf7742c05e6d42b09499058c15a0 -size 720874 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 24122e2247..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fc2f2b7a5b85ab4106b51b12f63afdfbcf01d0013baae9d89ee8368c103059e5 -size 802502 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index bd1672c18d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:521e68d3cc63c036902bf9172760399278797a5557be6caf2785383787e6af52 -size 713892 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index ea02585d71..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38060492689bbb4b586e16c23d24d74d45d5926dc859e5fc6bf998afb8d7ee27 -size 826010 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 53d1eb4360..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4eb334454285ec82477b0a1a74cfdcf0e92b25e306a8e4e0b8c1dd8d942f01af -size 735672 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 2fd0b9b639..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:755730ee1d3a295f62c076b752d975ff783eee3d2221aad7f4d9fbf5b8bd81c2 -size 785038 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 791622daff..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8b995125ba84314a445456053981efc43171bc77f40a99e6510bed667c1e7e45 -size 697118 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index a740bba6ef..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8b6c6615dd13c625e1c63c40451023c24a3332c6e5d697a5159865eef1fe3d84 -size 809630 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index b99accb2c8..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f219bc2357ce37cf2cd2df34c4fb806d6a4e7f4c8709f3cee7fd73d78042488c -size 721366 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 907ac91162..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9d042f6d584fad3d052a66f33fa7acb93ba76f9b362137f029861da5cf3be979 -size 876192 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 62d1f9b37c..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e3cf2320730aaff2fcfda9c068630e5ca943497d61c07010eb873284c694e999 -size 779934 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 8a9b7753d8..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b6f61cd2dd9a529b620d4d78d2ea774b5378602a4b33785151d55621f885eac8 -size 899944 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index b2e15beef4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7d13c628ce30367d165bd6866c4b67762df51a4fea3a8e552a7d8481380eaa5e -size 802848 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index ca5706ec6a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a159f18b1a4190a773d7907689a432ef151e2b37c1074247bd4271f7e4e7f89e -size 859912 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 85995b9aa2..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1e80bdba3abc6730c5c681b32cefefd6b8ba93f9d5ab8205d2b87fe03e3fb5eb -size 763160 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 72bfd0ddcb..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:161bf959b5857738fb9079f69a8dcd80bc4a3ae61dec0ea70fad50cbc6757a1d -size 885884 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index d269522f9d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:87c16d63029351b6564e1ab26b50e52775d056826f6abf19a4832ca3dcd4aafd -size 788592 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 6826bfbda1..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e4e06a2cfcd955fe94481d6e698400972c0a6d418483555099a719b795a73f85 -size 642218 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index f712b0d290..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ea9ecedb0f6b18f947c24d72b6470594e0515b8342098df83515dc9e72405294 -size 545023 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 87b6990adf..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f819bfe173a8de0930852964f8d877f9a7b66d3fd58ec8e4e20b133d645c394c -size 640638 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 343d6137f9..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:33087f7f378ea97abbe45ae227894bfc9e4dc8492fa88c54cd34e53b0c25ef17 -size 709394 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 8b5172b585..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8954f8a25afe7526837a8123636c07f9a91c25c651a75e2bb44259d51f811562 -size 613333 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index d29ce79fe1..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0b5bac48e88cc48cb94d25dbc89c3a4bfb6bec70efc53f1de5f6527dca67e374 -size 759338 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index d726e075b5..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:33ec30ec4c739be4e19a5df5d3b698c389b61721300686039a46974a2fe22b87 -size 653608 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 5a60fc2185..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:62b14ec14428e047218b8474375d22cbd7f5ac0100c903e8dbe069b64375c106 -size 731564 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 7d0e432914..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e37fbfd700f0f17657073955524e512851330abbe8bb6499673d1aad6820fbb8 -size 626624 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index ecc3c24a55..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:75a737d1e1609e509ae0ae8553438e1434640d897c855dafd1581e4131264c9e -size 753466 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 9f5ae182cc..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4f67a0c606807e969d8a211a1e9a547b80eeab8a244185d78321bef9b2b8b270 -size 671466 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 0772c0d515..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b4ba043361e7c1bcc11bcdfaee8c7a700029af0c8aecb1f23c836f3f2166fa88 -size 725690 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 87223e245a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a013bb1b04913a66c54e9eed30a20dd2007ced1ca689329060e24998907b5c07 -size 643690 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 6e7083c9d5..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dd0ba22d001abb0ff82d90dff45a0aca601fadd96f3a684a381d602b3c1e53c2 -size 824342 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index a91e62bc7f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:872bf72d6b27cf796efc6212be5bb750d559e36ec051f0948a874f92902cd223 -size 721770 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 10595be75b..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:60fd15698f75fe3ee0841c12ef99b0453b7eb67b319746eaced3d7a5c82697b1 -size 796568 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 026325f7f4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f7adebc2cc9e4150ca8e6b6ee9b80071c5fe92f2fe560b12cd985181164a2507 -size 694784 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index ad0942d2ed..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b42fed32581a211bc6851b9e89b046ee28c3b485328aac985d81cd26760cd476 -size 643598 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 46afceaccf..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fbf346692867ea9f3d1bfef21ab10a6db65aeb392a90df5d156c00a1f47c93c5 -size 559871 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 027e30cf1d..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:061260c7e4481f0f9eb426f8343ca8d5fa642e03da2f57eb68020d705fd8f995 -size 687134 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 9c744ef16a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:da7713c0487a5a57a5eb651cfdc6d9a5cb8470da7c383bc3c0d45d84b4dddd2e -size 603555 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index b8ec29e047..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7aa2e16ded9f93b9428f7e4d1d440e605c0be00f05c842a031db5bf75c856e06 -size 641030 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index e7142fe074..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3c8a01f031c8ee6f2e080eb1d051d9be13cbdff92b3598729a3a815dbf283f6e -size 559573 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 280d74f184..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c33ed9690c2132de6e8275257676660953b1de6a7e4c1ce996a87e692ca2baf4 -size 686046 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 912a298310..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:332808506d0b6f3d1bd9d4988892dfe658c8f93de52a3a5c5f40d4731117381e -size 602171 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 59931f30c4..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5eb2d62f00f4acadcdb345a4130387cb9e8a6485d902e3dedeac65443ed42830 -size 709884 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 09282ee7f1..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b0ceeb5ce5874c405f88c6ba64b32bb37478a6e090e36f5b2bf387bf2a8faef6 -size 627984 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 86c1dc45fe..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:00427e0a6615658a9a01a2f39db0b514187c007f073ccd4ed266c11977a97b47 -size 754408 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 813921f3f5..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:528049cfe51fc8c688989aa4a656ed550319e2ae55017a1810de18b6c934a3e8 -size 672506 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 28e56dfc77..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:475cdf6924088be0943868db508af64c7fe53fb2edbe34156ea8693f04f389f5 -size 734274 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 766ccb84b5..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6532f64638099e1e99dc85b2e9fda9532198b6831b372c9eb6bcc4e3c8f6ae1a -size 639892 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 1b6d2d0834..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:24368800abc0bfef173e22a905f713f936d7b0b557653069c7ce74c857bc73e6 -size 772630 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index d54b60c862..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e44b305a2f0d4dd49a779b557cad0430e948272187f2b1bea6eab4c43a0a269a -size 683922 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 26f464bc36..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:59d91f5fbd4c04d86683c79d359c615fd92cc6575d8aafbf47fa2d7e607703b9 -size 727122 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 2562cd40f5..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:11432e328abfade1d0f623d3c0f8fc950805600266dd26f6e2e54d359de33249 -size 632738 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index a78c5c9ca2..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ff804cbd6d2421a852cce91121362aa0287bed24135224cf628479ed194f3121 -size 765478 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index d12ce6510a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cd9abbf889b792cad6ae5b5af4c78e610034ae487d4b1202910387668a1b757e -size 676768 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index b2b4de5670..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:015c3256094341dbb532479e6af025ed0708f0dd73c75313ad8cee770348e248 -size 723716 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 154ddd897e..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e95a416ebe8cdd5156a14459b30c7bab96cc36537daa01ecd4558b6aa763576a -size 640680 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index fb963627c1..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a7252ebc37307091a3d895e70f35c5009fb3dbe1a1a7c4ff077473189d79c56a -size 768436 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index f65b75c782..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8ae59b4ce5982526edb2bbbd76fdeabade68d0f4c69d92eee34f2b679f24769f -size 682686 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 1e23e606e0..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:01cf4e000a747e61409dbe2a7d9e340b1f37a60090ed0457c1a2d7c4f521af1a -size 716660 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 81888da11a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:efa783fea82483ce69cad9b33b7cee947108d72350b4ccd6e6053fa1ff80f4a9 -size 633526 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 3f022eb40f..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4e4a1369c711fca90f3cae6003f23c6b4d67ab835a2a68f159f07c7bc551cf3f -size 761380 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 911c9541af..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:25cd112511956d6a0d68280bb19125382e8b82bd3f7b96994a3eda3be0a4647e -size 675532 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 94e8eea781..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b3b0d18b8ef8f8564cc058d942ba9328ba6204b9d9e852571ab0286291a83ffa -size 803226 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 3167c6a1c7..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:146c3e2fb3f8a5ded3031647c9e6d15489020b34b9ff7adea549c6b2118417ff -size 709582 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 25d7dfb59a..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:379bf0bd87059f66a9e8f968d1a113cb94f4b1dc7e833d93050500e2634276ec -size 841680 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 0de7f00b44..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e131fdd9b174d7c15a58afb4f91ccb4b372aacffa557b62b9b15fdff12fad930 -size 754056 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index 4b96ccef25..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0fdcfe6692a69d024129443b27a1175b544c8a1452988ae2e3a98a807414e7f8 -size 796072 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index 8b2d7a86fb..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:70f15004d25472c634ac59eb00d75b366b40ccbf207d32b9064d5eeb5950d270 -size 702478 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp deleted file mode 100644 index f9a2787701..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7f1a347bd6534685b4d397a783d6626bc75770166090f448f9f228adca0ea6da -size 834526 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp deleted file mode 100644 index fe1df3790b..0000000000 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8d8dc5ac69a4ef4bf4be1379f31b0499ab075835c1138f11f1ebf356d8f58c73 -size 746902 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..455518dadd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95c64f35d1f2a7aae1f7d2f91a2753ca282c21aad2b912d6546d2f2d17f09ee0 +size 698722 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..685a777236 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91695a51f24ba72eb58509fb716bdbefaacb34684b75607362fb3f54d05bfe62 +size 611293 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..14c48dc3c4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f91512436d1baeefb09f24342154aae3f38161c5570bd7ef671500444ce608c +size 725434 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..9dc218872e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e303479ab2c6d579006dc92ff09db39944e9f9d552cd0f4819caefbabe4c678 +size 637564 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..bf3144c5df --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b61b08d84f9c88c00cefe477c74377f67687a40fdf279e67e0fc422eafc255ed +size 696056 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..4d412b34f7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73a7a3ca3b154257a224a89c65710b686ea4cdf0f4791367b5ddea52ebea0467 +size 612131 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..41101b47b7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72f8260e617222c2d45abf62014e9f68685cef547b5acccd883ebb2e38f1c185 +size 721092 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..1a77aa542a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47aeeb4434826a93ff536b48dd8056e9e003f531c91ead166d5eb28f23eb91e9 +size 637266 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..bad35cc179 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6febc10eda499aa78944853207098958e83453b27451a7ee5a6c9c396c5e20c0 +size 765798 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..34a8bad712 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84816e3f414d78d75a340bed58cb0e05a27c3e816893d03d038c817680fcd673 +size 678864 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..264320c7f2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52083c4f75c0aea0c1351bb4f8b3b725812de86e02f157d52e3dd891bd555c2b +size 791870 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..b8171e267e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e974bdb4c377ae521e21ac7ce18294f8adc74e7c671bcf9abafbb12b0bcaa5db +size 706514 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..1ea5a87126 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ce1842655f6b5b826ae6f6ed735022b1e37d43c88da4cd0be284859fba1c1f0 +size 799758 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..195de27fc3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6fa2f8cb37a364fc8effbf84e27f8959ffad7e46fd56aa5b61c99616627af27 +size 705376 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..76245b2147 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18e73c9eb2bb9e0e85ee55b8a988098e1dff80e5c0275c8afc98072fbecb0277 +size 823264 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..8fe8011f9d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8517d823fd94eae52768adb58eed1355f7fb06974cc6d303cedf9a449f8dca8b +size 728092 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..2539c2e21d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:585a905bf7e5a83d07cc1f1fdad41b0dc0ee58c56e9272155a78492ffd8b275b +size 783380 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..1bc06d88f5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05016243e5d4ae07062dcc2ff167c35720c5a7cc17699eb09be3533c46738ab7 +size 688404 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..44b9c7108e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e9d1f1731baeb5899a7a166f79be8f0c8ef1f1cd64f9eebeb058a601cc324fe +size 809156 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..66b3b73d2e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:494cc6b811168c933a8a9d7ce75790f5853daa0a02410eece162b6806d6326ec +size 713786 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 01003b5401..3d72a46d94 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:98bfbd2e3ee3779f479a18ae32c744fd64013f9a27466ed1839cf3152be66dc1 -size 785286 +oid sha256:2313c196839960c116c7b057dc550545e81fca6410fe9a33499929fb4d058cc2 +size 785384 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 28d7d470c6..138036b929 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:af09e72fc9a31500a3f2372c3473b89bc5405250fd91c3dd19531c7e3a6b2ff0 -size 823000 +oid sha256:f370d942bd9cf8025d4e0cca17d4663fb1e7d9360f683c1e990cf78fde0786ef +size 823888 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index a1d0a2af7e..8b9a9368f1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1e4408b0389b056a523e5d1ad57a3d1a3d7d73d6a31db677f354ed4dab2a6ea0 -size 786020 +oid sha256:9edf1d07bc7d1780cb03c8a64db3762f55e3a721c3dde860cf526510fd8d5bae +size 786118 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 0b7299ec8e..a051af50fc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4b9eaddd7f89bf7c4466c9fe90ecf5d791f83def82ad896edbc136c889f19680 -size 824524 +oid sha256:dc2875aeb2154f44f71c55510169fbf9ed9dc19880cacab9ad122156722c92db +size 824622 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 2e65fc1e4d..ba1f2f70f2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:72e1c698a17dc9299232a0c4e1f0e2f5ef231f3aecbf2c5972e4e537753954ec -size 762080 +oid sha256:83f094d7a7ea359626b81de05f7a8fa84be60d7395096b30cb8aba2b3d5b438c +size 762178 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 8bd22700c8..eb7580eb49 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e56ecd0b3a7326a1beab5ea75efd8ee202f8c7de788737e6d24271ef245999aa -size 672876 +oid sha256:30bd6ad45ed541885f2d59acb154b877911e8cdb67a2c463d9d953a222a254e3 +size 672976 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 7d60bac16e..ca4cc012c1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0b2a7e3073d7f52ccb738a9f72c8a5180b7b47f6bf9e4c0a427845f50df98c10 -size 801472 +oid sha256:d31629a2c6a2956206c7291f0a9be5c1defec9767719e9ff33d8325554d0e8ee +size 801570 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 19a49569ec..814bdb1c49 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a7686b9ac409ff2a2014033c20a27d35ef50edffbc7d67c55edd5063c554b7c3 -size 711430 +oid sha256:26caa80e122ba15de3d97100484a3ca59183814372b04373167b3ca368850fa7 +size 711528 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 1121029c14..6e723d0273 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5913a4dfe051271351642afcf76b64b5a05021a60aaf9dcca80eb8b4f74599ee -size 771028 +oid sha256:69400093f0bd837413d97b02a628646d93460213f9dfefb207cc6ffe5f9f09a4 +size 771128 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 8896f321c4..c2d4cf2f2f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e9e7b75147ede277fe4e71c528c3f4520231d8c7c21a07d777e23f1f8f303b7e -size 809484 +oid sha256:853d27a487248affa01f6c388d4cc0150a05b4520eddbb8f520f7b1f698774a3 +size 809582 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 7165c82468..604ec3eba3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2a58210d080f93e1f38c76c8a59e447942bd4902599b558fe62e5b42ae672460 -size 771714 +oid sha256:2ed04b6a6c3949bcf6005334c4cd2bc23a0f6cf8ad1f6cfbd142424d95f7b11f +size 771862 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index da07c99b99..3a6c908d4d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:198c48ca0df5a4a2226f7bcd8da415c337591594b172718594e5ac22082c4bb6 -size 810266 +oid sha256:bc0b7a1e3e340079b5d53ba5c985bd6fa0efa23fd4d42b2a8392146acfcbb4a3 +size 810366 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index b5cde216c8..48d7a4967d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f32fa54f85198d9fedc0ddbb35138e4ebd603dae63c8077a5edab4d7ee2d946f -size 741556 +oid sha256:406e8a677e743f24ed7f85379b9f8612de720a8b66dd21eb71b9bf97c78ccd0c +size 742444 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 987d853821..bc3b21a287 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:724cfbf68e1e526c62ab9c8774e44f930352ce2b2d7704a335ecb6a83c621210 -size 658570 +oid sha256:b8973ec2e87057be1bda2838e3f520bb3d0ac7a46a53eeb876efbe6b0539ba16 +size 659458 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp index c1c527155b..0f90079a7f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6158592fc32fde934163d16902a2347599d5eaa28154b674f71f7b21f4341036 -size 784896 +oid sha256:9b2934e1b10faf415fe3a9f28e045eadae0bbf3696c4ff680854c06829260e96 +size 784994 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index ac9b5cfb9c..d5e80299f8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0cb60870832cb240fe8c10566359667ddfd62c8f7634ca3dd1a55f75eef2e538 -size 697124 +oid sha256:391ac6b21073c8c47a0bc17b6565450f3bf925d03da1cad462ff5984505c0c89 +size 697222 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index fe550ff427..557165ca62 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1d6cf5f2caf276bf872dcfc6e973958c018ee7b3cb2e3f5f40e99ea3ff721086 -size 665748 +oid sha256:24df9ce2ce1ec3bdcb4f6737062ab62e8e827897769057f9c5ec618d6d8f52dd +size 665846 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 812ba2e4de..6c615e1183 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8cfd2b36b7f31f06ac8368be62af8d01d85866895d2f29e481416bdeaa163de2 -size 622086 +oid sha256:d4f2bbefe52912b8a8620399af69eee481baa78508fea025352ce628b953df04 +size 622184 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index d4ccd7935c..4afdb4a403 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7bff8b90c8a50f417e7e0d94d4671ad7a693045d3e2fb183d4fabfb5b83b492b -size 566315 +oid sha256:028d56f14339aafbc77d239db10655477e77434ce21d97afdbb6f0696e26ccdc +size 566413 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 133edd910f..b2e250eab2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e106c4237f03926f5fb29d711c67150772bada9d20cd4374694418030e16fada -size 528277 +oid sha256:5e01321689ce1af67b7047775ce71811d20d9154fa3d4ac91f42adaf6ad17be1 +size 528375 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 45e7b76da7..e1963a6ae5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d8941bc6c8c8fe770a093a8660204a6556ad88a7e5c7ea37b4d62cbab5979fcd -size 653902 +oid sha256:e922f45c67c93dafd0ba27cedcad0cfaecd32b7f962832b44954a40223a9f26a +size 654000 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 552293e5d6..28166231c0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:da1a1363f37e87aba00c550a8eb54299e73a7c416b85bc5d6fe2d60bcc3a4f3f -size 616651 +oid sha256:4baafc2524d54dc0b11b9e564b88ef03bc36f737a94d5dd62c1d45f9fc27b9ea +size 616751 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index e9b5e3bc76..300ec1a891 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:90347e03cac125000e4c6d05049b72b317bf4d38002a8c660f5e007c4c6ac8a0 -size 553037 +oid sha256:f5c582cc68b0f1d07344291659570b57001d41fb036ca48886a351ed68602304 +size 553925 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8d90160c08..a1c7ec5e55 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:478d1539db9df10bbfebdc57879b37188926b29a121f056c51d7006180c48a7d -size 519391 +oid sha256:0949112ed92385b09071e6b54a42cdac7ec5e2d828791515ed8eeae391524e46 +size 519489 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a0f6a048ad --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21d25ccffd2688f5a131ef04bf3237110ef96e246b0e577a7d829717a18866ae +size 795464 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..a2a19be961 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66b86614a58b0f50bccc74c71cc4702fc0b27f637d898fa29fcf7c122f2e218d +size 707642 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 13845641fc..0e732b85ae 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c4612eeb66975e34f5be706cef176fd53732b532ab7a047149f560569a866c2c -size 659512 +oid sha256:1285d178a642c46f17d1aad893a44824528990e5090816aa15309e2d032c2b6b +size 659610 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 0c1d13aa14..aa909e95d3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:28074d7a34b2e62039a4b0132530113893d6e87ce64c5626309f9f1c622c06fe -size 566263 +oid sha256:dec543231e75fd7b3dcb4447a14a08cfdeeaa0ba6e15ef4e40ae058a5932fb15 +size 566361 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 34d5254398..9c644694f4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6b1fd20a17f1df3ac463c57c367a06ed2118674ea60a0e6adf356bd65b30e3aa -size 612641 +oid sha256:327c4bab259f0f35b789b726885ad89639ffac664d088f07e71250a5eca73c8b +size 613529 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 031614cf2b..8568ab8eef 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:270ff3fc86d207a22f16988611a80f0b1aae333a23984295f701e56742035025 -size 532023 +oid sha256:a927d2ab68caf22d2bcf4bab0e7dd86c97af6c4e3db8b3deeeb5e9db245bc992 +size 532121 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..6079ac7e40 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5198b18cfb0024d2c48d9e41f5e8038f4f0125a404acb807ac14de4a3abfc840 +size 818970 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp similarity index 81% rename from cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp rename to cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index f7e3f6b37c..c47c2f346d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eb7a8f33a7213c0a08005876c26739fb6eb53d5ec7c3f960016947e736b91d46 -size 816244 +oid sha256:59138742b739eb42dcf5c2ab2192603316c037a8d9015a9109b164a6abe2ac10 +size 728584 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index b97feb8462..d514265919 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:421aff37ffcb2ab92003f6f1ea7adaaa9dbe335e03a10bf865302452fbb16f9b -size 571573 +oid sha256:4b5eb145563700952dd9a8033666309bf08a7677e339fc30feda75d2c3351b98 +size 571671 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 12a1221bd1..c23f219c6c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:11c4b81d582f6fdb85a637e919a7dd4731a68dd542f036dca34ce7429b8b76c2 -size 470135 +oid sha256:9f8000c403e796788506e85af2682be683cdd7f2cfb17c2ed741e80a4f5874c0 +size 471023 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index aed8eff1f1..836b0d6b1e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ca848bae7458fe83bc1b048cc05414e92ed79ee8d4a59173587102e002620e07 -size 540837 +oid sha256:a25d44b9bdfa95828fb705e3969b67b33a79df5433528cc268c315aecfe97638 +size 540935 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index af17b99cde..2cbf7dbce8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f81aedaac0011dec274670ab05c88ea8f8f6ef5fe6cafbfa169b5abbdb58627b -size 442359 +oid sha256:072681f46200df3be9fe0abe2e68c82cb79d84c363af77d63f669440ac4ec242 +size 442457 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b8c89ffe7f..0a187ed985 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8ea52367f005d4ca9af332cdd7dda4cc016f379695f920c20fcfdebfae054e7c -size 651490 +oid sha256:1167ffd3c265fe402647a24fa5a0a43a54bb9a0f30d2f328ef7402a67d0fe015 +size 651588 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 127f66d597..80bb1663d2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:406951337055373737047032fc6b8e337390cf9282042beb3bf1fc79fce360c7 -size 608567 +oid sha256:a1adcb5fd3dd3e3099c51af0c012ec26adafecc6745ed9b10a34990d1c351020 +size 608665 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 335dee48c4..3eaeab4296 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d0b75235070409511ddacd2fdf72394b2311dd2e19ec11d1df7083c9472a7f86 -size 559211 +oid sha256:93f6f7712243395692b95e114429e8b35abd09280084ce7373efa701ce770868 +size 559309 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 409fdf42d9..b716c9bd84 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:61c88be9b93aea5f939b2f083bfe13121ad1159eab6669217ee2d4a88a78c13a -size 521123 +oid sha256:153ad067860e464c10e77df515ab46a98dd39c66ddc9236a9222f4d5f32894a7 +size 521221 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c1d1dbc7eb..f1f7c64ac1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6c14754ba1ec28ba1eff756164ac0d5c587ec6631dd13d626105c1b18762c240 -size 639594 +oid sha256:6bf8831f0b89f644ca251106fb51528243873078f7028dff64694c3aaa92a564 +size 639694 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6e5dd065f1..0817c0c7a1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f33ddf3be102e18200a4b50776f7110ce0b2aaccb63749e181d69e328596fab6 -size 602345 +oid sha256:c8b5a07cdb567b3ae235569e2172b9b760d4ea149d04db7e52b31947067975a1 +size 602443 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9f0c38c478..f5015b4ff4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef054b52a9ca384c3858c34019ab026b5859e747d476bd902f5a0574014b44e1 -size 546673 +oid sha256:d95b5f2ba8ad10d0930986c436434df56c943b03b09ddbc23b0007234184088a +size 546821 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 9c7b7fc180..94ebd860bd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:00883b7132c7f852274cb5226e7a366abe7f75846797fdd0a537fec2b014e41d -size 512237 +oid sha256:a1e06bde314b2493ff5c987da6998d08d9d6e43c86b80b34d39f71b6598b2dad +size 512335 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..6b60a96266 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:734b0fedfd713afe94199a949add231a0e06bddf825ba68756ce7babbf24fef8 +size 778000 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..0458efb90f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6cce6313670e9e9905f3d26c7bd6b4535eb57702ac8ea0f1b7441f1ecc69ae3 +size 690080 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 610e064e79..380d99d133 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9261db4c215629fb21fd4a6035d78ada22829b5b34dde193b65048cf2dddc6b1 -size 643626 +oid sha256:2b6541e13cf98deecdcd746c27b43ec70634c392f89fd255d845dd7c7125c2ce +size 643724 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 1654d889eb..666d491b2e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:837e82dd366e37835fda543e869727d07bfb62212e69427adfaa8f16036a4860 -size 551955 +oid sha256:996250777780f00d98eff44aee322fdb9f088ba7f54445ab0aab6c942f9dfd5f +size 552055 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 18487dc316..1a6b9dd89c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:471fb93ab79b6a5f93345fd6e92bf2c6fb6a6ef636af9c3cc387ea927da55684 -size 598237 +oid sha256:f3349461ecbd301d1bdb6ee9d5e81ba0c2bc39b59f4de528b1251a8720d6cd5f +size 598335 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 40f8ae420d..b3b37ce4bd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e535d778e93046f6da8c44b7c7b58a8e27663d7a857c7416739dfa111ad99e07 -size 517717 +oid sha256:94e05466343099612baa560c60207fbede006d7c52dfba864300c6bebf40663d +size 517815 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..21e3755ada --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e16e5ef3c5a4926ae227417dd34a5a09cebf6be1124ef224d8b15ca27ae2b7cc +size 802592 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..4938dfad5c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e05e11227fa5b4ae6510b829ff78ea46e1fda206aaa3b09b594d3d6258bd82e0 +size 714326 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index cdb64b68c8..5db7b12ab8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:60730cb0f9fe72859fb04856a1c2c9e859975d04b0a5cf9f8737befbf1ed6a03 -size 563433 +oid sha256:8d9d258ef50c4d3fcfa99ba505c5ec3f37001c715629ede22410b0d195202352 +size 564321 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 85c431aeb3..882f0dea78 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:32894f1c2ed32942000f3539ea1a8065f0c647c2649a0a14457c27bd5810706d -size 463821 +oid sha256:b36831c8797abc3027ec770f6879407a6a4b53aa64316354ddd9a9f885259b29 +size 463919 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 04ba085840..bb01fef680 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:26cf18f400f5299aadecb964eed7385fe406164c885974202f9edb3341c7a99a -size 533683 +oid sha256:a9d95e6e06297c971d0b19cfabc35aaf4bec338ecbb5c98b2a3d377421684ae9 +size 533781 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 9c28626db4..fb8b1835ae 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6e2f40d205d529de8a3ce4094157dd8f1294c9f763fc8f909aba4f38e8defdb3 -size 435205 +oid sha256:afb1d79cea88510057b6b35fdd70eb85b466a639daa96a6cf64fcf3ad3ef484e +size 435305 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ee82de6095..420de88d14 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:779ed169ad536937596e5fdd0e2dbcc81e95ae1783f09cc9aeb6af5b265d65d6 -size 685564 +oid sha256:e25605ac7a45799811ab3b5ed5bfb531241a07adc8c65aaf51910d244eb5bc27 +size 685662 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b9674daffa..ad1d0333c4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4773ac5e287f78acd4c56963d04e24c3b6c5e11684d0a0c766cac49be68a4544 -size 628040 +oid sha256:40a86a91e46816ed06ccfe047e06c9b71f35e3d9b5a84c9dae089767e86c2399 +size 628928 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 891ead85a2..37ad92c59e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:91a33cfb1c4c94795dfb6550348d38d9ffe9de4207591dc33bc86630febf0582 -size 587809 +oid sha256:c63edd35240da1617ca6ebcf69e78923f6da192eb11f8a0a874602aed63bff3a +size 587907 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 3d6565deae..29698ea6b2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d713e0e714f1187416bf1afe40c814fa5640c7dec8c2d9d846743b3f9cbeb356 -size 548291 +oid sha256:b64c931d04116c449b175e0459984752584dc9543cc05e538ed7475662ffc2bc +size 548389 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index bc62b90016..bc6f03b804 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd13e133733748ecee5f566853efef88217ecf3d86a58c1dc1d2189df02cc7b1 -size 674458 +oid sha256:8fb7f3e9d5364c7946c5a33405417fd2a3bd849229067f6e0ef4104f19e6f67c +size 674556 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 3455d0488e..ac3077de1c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:18cf090bb4c3552a4765b5168c3de32a9d8b7e0fc3d1658ad14b37b0ae28bb4a -size 635532 +oid sha256:e9b6bcade25a9c30b746818d1b11f26269449540e11a531d4bf488ac9275557d +size 635630 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9cf6acc9af..66d6c663d0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:447f75e3b26d2d2cd517392fa085baee35ac09d7e7f46dfea5e5e3130ea25b3a -size 573743 +oid sha256:6bbc99ce91f63dbebe66dc59bb9a8bcf3a642b6fd384dd2ead30cb6b3d6504ec +size 574631 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 43700133d9..248a5e3132 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1b1edd882b4c0b7840e44500ebe48e932b048e9616bd7cf8f6aedfd71da02711 -size 540095 +oid sha256:1d84c8e5f1adcc880e8ef09984e188d4a8dc0d59e08342e82e4c9f05e37bcdbd +size 540193 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..f646693048 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf851e06d7c085b7bd3593749dea7493e78282a0a3ac42323ed42170fe42a1c5 +size 869104 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..363ae0d3a5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c97ff1bf5761c463955c6d2edf41bdafc7c0349aef1cc1c56cfb6cf47148217 +size 772846 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 6b3f2a5458..5e8aa27803 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:11cdba5f4fbecacb5224754bfac51ee733eafdb91086ad72574ea8176a6e47c3 -size 677306 +oid sha256:67957bab21036c5870ec3669684d06c8f6804af2b67aa03944ab16650d3ac440 +size 677404 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3e5cb1675c..d3871255c4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e43b966d07843aff9a10aba41571c48eb5751438c8757299e48525977b552b82 -size 571477 +oid sha256:85d363614c6f2b411394468f9254a4f60852efd378fa8a59d70a04e81f73d020 +size 571575 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 3cb38b43b5..3a39c6a2f0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e4af3b6aa52fbc7e39be4cebf98f7a11c2a12ca219d67ea142c7899c5abac4de -size 638972 +oid sha256:e1581b8e4a58068495b4cf39fcef51735d79c1dd6346d2586d3475e2223f206c +size 639070 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f029a223c6..1064c7219f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b8040b646fbdc98c36cf014e356e0758d68e2166ebf64887400b8620f83d25ed -size 536595 +oid sha256:47f66e8ce0c1f5dd53f6b0717794005bc745e787f662a1a4810c710ba897f476 +size 536695 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..8d95659f62 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:659f4ebf3a4253cba77e63df48c2ea73b2022b489e196a038c2c83a61cc4fcc6 +size 892906 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..fb83a49d4c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2cdff0a363bfd4b4aa3b8cd8d277bb1de0e40c82a13b2a831affec2431d9773 +size 795810 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 5c1e7f2661..707f9cacc5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5587e63125c621d6dc09f03a4bb1564f2911b64344187ecaef637f3c6bee9543 -size 591833 +oid sha256:3ef4173a642fc01ef26bb98f76b6f8e0122a0a9a6f037c6eadeb5c3274d79892 +size 591933 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 11eaa11db7..400f9ee264 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9188e9234f5b9c8b2e679bad73019f07a5f4cb850b81979afe35c4c438db32d4 -size 489113 +oid sha256:63d79b0688419a9a8c8308725b4efc8ceb23ca810950ed51fb78bdb77193100d +size 489213 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 7f89bc7891..14081775cf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cec143bda70794a12e6a746440c2e97c8f1429c9c44a65b0576f378aee630448 -size 559863 +oid sha256:6a64da276754207313363cab5a3897ed4db5ce471f518963cbc5e6fac8ea07ee +size 559963 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 3b2f27703e..8e98af0e34 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:610c5ee950605f24b51cbf6bc05b4a7fe8e7010ea2a1aee63649a49d4aed4a0d -size 459759 +oid sha256:fba3cb6ef22cea48a45f53fb9e8c5d8c2259a8d2bb5ef32e46ef73d5cb7bfacc +size 460647 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 245c153f7e..ff79a89eab 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:129839471ed85ce4d815f7b2f69b0145b2f40da3de1dbfdf14f6516517335276 -size 671258 +oid sha256:4a712c59348c962d0952e7bd4013726655dc309c30fcbaac92a9cb8dcd7b3eb4 +size 671356 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 406c8bcb6b..505ac8ff02 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b03633708aa70f120c934807b830abd3e215ab8f0db1acaa3e3b66d3dba2521 -size 614521 +oid sha256:fe18b18349cb817b5fa4e8da4cd7a4e60c32cea117be88854b46d890333bdcda +size 614619 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index add5ef7508..144fdc3881 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:15def0a1549661368855d7c08c7defe0eaaf043c2295e9b55e711f5a40b0f27f -size 580705 +oid sha256:2db55c5741eb0f3a3ccffc997d894119feab3dce1706a8184525ecf0c029fae2 +size 580803 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 808f572a26..f0db777d34 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4d0723e315cb1f0ee49fd825e54ad71e46da7591692dcee6fe89b0fea8e64bf8 -size 541137 +oid sha256:3cb115ed204500fe1fa5b44554690e3a6bff5b969323bc68c889a7fc01cb911e +size 542025 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 91ee1c925f..6fdd211330 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a5182f4e7423b9d4b4ba20d13d6145a554266613d927fbf4da3c68be2f2f3a77 -size 660200 +oid sha256:65d34be5c06b2aa6b0736b3dbd972a75295267d5f773b148087da9c64668406e +size 660300 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index cef4abeca7..db8cdbed95 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7b35e692d00f80ed5ad388e37cc99b447c2ebd5c58a2b6b51ed755990ca36ad5 -size 621226 +oid sha256:aef42339ebd189ddc678fda84c33f8be04cb958c089f3b46cd17f2b62bbee8e7 +size 621324 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index acdaf345d8..6f24eb5b9a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1cdb1dc26cfef02dff140525de3583021fe86f9bab77cee100deb858015c7847 -size 567379 +oid sha256:1275ee2f5d801b70f9543919b1ff177bf1e1a255b0bc0b58eca61c9c8c2a4071 +size 567527 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index b8cd330c1c..bcf4af9bda 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:19ce09966a675bef2b8d0efa82e4b87c47ea36033efb6909dbf840cb90cc694f -size 532941 +oid sha256:98d439027be57351c935afb30e7643fa14b1e4e72190c528d937cb138f43d3b0 +size 533829 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..09d96c3c8c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46d7b14c659dbb4db0c37763a1ce46497926103be3f29637660570442bd0f34d +size 852824 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..d753065ee6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d4188901758ccfd05c6e2b4aada3dfb35185a791f66af640e6a86864210156d +size 756072 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 7cf4f9e077..8f2f36b27b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9d970ad66239d204c5d260377592080e1db425a78c29c60faf972b75c665980b -size 662210 +oid sha256:aec4c44aebaac1ae21c3f55a1579fd59a5b6e89674c604cd8c1d73e4fc48155d +size 662308 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 0320240799..12534f10b4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a43e3cf2b6b9087b04516c994335a3bcb39f3c727e8f97a2dc941502bcd47276 -size 557169 +oid sha256:b4d100704074d978014074918a88144eaeffc328a24efbe5d71aa112ce08abef +size 557269 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 0a1b94ebd4..940a57994c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2f5f1f71affaeadbbe9778c490c359ea8252d17105df91da28ab9bcfad757ef8 -size 623776 +oid sha256:18f6c1b308cb2574fb7b9f07f4913912a9dd12a518401e519d5ce9226fe96d9e +size 623876 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f6134ce11d..6acd2f0d5f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:38cdf1935d7133a5cd4a38e7d485ede5792376c790b67211a20188f5839e3e2c -size 522289 +oid sha256:7602e79ef97ea0c58347ef1d23251edfd2b1d6b27126e320da934b1b34b45a5f +size 523177 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..48b3a1efba --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:835fd90e1539dbcfff35d72bdb143a0a5886847797a4d59c89d2e64ba0061c70 +size 879586 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..9b4a955cd9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f805b91e68e11830bf604b6d051b0a3ce84a83fdbd97a1429644bc9df9863fb +size 781504 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 643053e6c6..d2064eea8b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7b692de530f866e06ed945fdc49cc46677cdd52e5a84bf9ebb143e232228ba73 -size 584681 +oid sha256:2e4b64e31cf6805229955382f0d17613c1a9fc2ad938e8837e3a5a488ad7fffd +size 584779 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 34dc504a9b..9d59ba92c7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e538c72817c86cf1b2f3d96c76675cab1a21351543ff17f343da9d94003f1a20 -size 481961 +oid sha256:b8b5e078a92991392cd0d67fbc51caae2c95ea1f953a0bb137b280fc3ac1749a +size 482059 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 20167c390d..190c46ec46 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7cad41cdb87053ec191a7d6a14b4fa3155ecc82a2a937d49667bb3c056b8678f -size 552711 +oid sha256:8bf1bf49ce4796ba58f69efbe49543ff59f09b74d2e439840b450011ddeedd34 +size 552809 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4a5cd16ed0..3e43c510dd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7478161e1ffc6cd34340ebea1109d1b55a488a5a753fd3903a7869eaf872a059 -size 453395 +oid sha256:384819b93555f6874250fea13739d1da2588b27d289202781fabc271014f2c82 +size 453493 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..bb2373f106 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61d341ff74a18c24835b55c0cfaace6f1457c8db93edcec9c14fbcca5cfced25 +size 635970 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..548bce43e4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66098f6005821e4dd671cb0828b206146b7bebfab0bc7f537b7c58c37b0df02c +size 537985 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..c66c1d5c0f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a618a49e94aeb20eca4590d447bfb5d981061cff883f0cd55981f73698b75bf +size 634388 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..031b080ec9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad842585ce056c76bf1b2a9a9413f66ef1ff2825320705201d3d1e3e3e63fc46 +size 553571 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a0f8160410 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32c84eabbd18fabee05ad4a3b90957bece46aff4226f8452d684ca95df00298a +size 702306 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..ded6a93f68 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ee27df79d561e50c1533e7cdf97a803a931c84bac07a17209c07aeb556b9eaa +size 606293 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..73e8ca5b85 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17b5effc46b6c1f0c5532270dfb749ceafabf1619af0f9088b94bd305497c633 +size 752300 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..4c02a65bd6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30be9f31b36f5e4218bca660b2626015494fe2779ea0e9b44ab742a766c0d20f +size 646570 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..1066babc06 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ef22b4f8e6655c12e1f088ab081ab40e8b6d97e0520519092243e7ed0f613b6 +size 724524 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..8b0485a8c7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffeff749cad388a58c72e7e49dcb0bf9f70d3e4d7bcfc7632ca86d354e31899b +size 620324 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 0b86c57d9d..546057b7e0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3461e0dead1c400942e2af745420b8639ff68ecf160e79f10d061a896ccdedfb -size 804720 +oid sha256:b8d217dde016b5c977f48500377b289f01fdc524243eaaf97a42203f24083e1e +size 804818 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2990f0973c..e2735fd868 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:513ca8cd8b5599e7aced5b1a30d580767112590830138be88e195587a048b62a -size 765102 +oid sha256:11e78d18dc1c761303c8bef61f9cbd75f2bfd0153275d8a830ba89c15a6f7fe9 +size 765202 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 85a9d8e77e..0c64c0a6a5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:86eb6a375d344a44307c8edc9cab5a532e28e70879278ff00c47a26d3b6356a9 -size 787150 +oid sha256:86262b5e4be536700b77f4044f21652e749a6791c0883a7cedd4c023321796c5 +size 787250 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ee610072f5..bb5927f064 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4a348b7c58556a9adbb6631636c5eb72e105e6009cb84f1a864c663b0442a91c -size 754786 +oid sha256:d10f7025db084eee7feff1abf64c3edaaf901e9451b85478f22188eb8e279af5 +size 754884 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..58ec174c27 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63e8cf1371190e71a3f6ada93154ff67aab673956f0175adbaf0afa94a88cc66 +size 746426 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..08b69aefd0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fa0de97b60236eac4b873d31b263e7e84f12269bc3c7a5955063c67cfd882b1 +size 664426 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 7747ec7f32..641fba4362 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6f789e6e465ca514029e5c2a19705789485fdae978b85936d5b8fc470d6b30a9 -size 775148 +oid sha256:a280a5bd5b95c87f14b7125f0bc790f16345dfa1ddce74610cfe5b12510ef999 +size 775248 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index cd47fe4523..f0502e80fb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7819435bdfdb06b1ebc2bfb9e82e742facf40cd194c199f51da4d41befabd30e -size 700844 +oid sha256:cdfb54c348d3ea516f16920551cb9c278f8c430ec750bd4035527086357b945b +size 701732 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index ab4f484041..6b2cc3c603 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6ea7ce36a717499d40f0540f53d806d7565dc06f9572f18e3f5a572de2c36a37 -size 724038 +oid sha256:e4d8027f6af8f0c80e0191ab97e144ace6b3122a2f58ece29a37aae92f1cdfe7 +size 724926 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ddf6b348a2..cfcd2c7cd2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:83f61d5dcefd73d99da7e285104ad5d97d16d1766d4c04970ad61af8082e1926 -size 664632 +oid sha256:ca9eb592aafe7847992ffb61619db8077bb170955cb576d0484027227c41f89a +size 664730 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9cccca59b8..071a5e2339 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5feb0aa0d922efdd68b025fc3a49a26e0f5919bc709e919ca478c574e3789eb2 -size 776944 +oid sha256:0e87b10fe143b1b69dbeed15e44326970836e2ba744885aca54fd186bc016295 +size 777832 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 1020b46cf5..24914b97e3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e1217dd475277901eab0132ac204e4ee930bf38435917344d63a0275da32a00e -size 738068 +oid sha256:35599d69f1f6a1162164943d013a631d120c34d56999783a70ae61f103afe1d1 +size 738166 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index bf75c10a8f..022a8dc713 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dc095d863057245caceb2bc98e60e7fda9b352143f969f33e80baa43ecaf1166 -size 759326 +oid sha256:95f2dcc49636c49e3862508050ae1091121722a4d6a893f583a648a5232f240d +size 760214 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 06048e0be1..564dcf1731 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4ce8a464c9420f389425980fcdf4459a595776314239f8a813e5180d3455b57b -size 727012 +oid sha256:9e95dccb0f52df4d8a51165e45f93a0f9bf6b0879f525ab43945e6b60b7aa5c6 +size 727110 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..8134ffea77 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbf773df675d7782433da79dfd7fd0429d0236f906040608fcb5e4e93282893a +size 718602 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..466fa62c59 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df2b43de899f34af6396e31f5bb8b1138d222f6f8ded4bbd0006aa8e0bd4e400 +size 636652 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index de611066de..a2f9bdb926 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4cd76a76bc0ef0f334b290d1e40f7c30e5177d2cf4fa4c9c7c5163c59498ecfc -size 747374 +oid sha256:f501fa3b6ce18227b60b32bce8f4beb700d4a210b5186051561d56a904071516 +size 747472 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5c247156c7..e5c783e4ec 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a5e725044442e234a5de2d6f129d855e06a35b00483769613136221057aeea9e -size 673860 +oid sha256:d1569b3ec643b2b2a02e33d3c9e0e895c1f4f5d29c6456c682a7914889f19fb3 +size 673958 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 86a656f040..a01e79bf43 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ed607d24ad3c4feff4bbf3659b369855964bcfd32cead1837c94b3ebecb78565 -size 696362 +oid sha256:97c67e9be5442a82079a67c69e3f57966713ba8e16fa2e34062719adc3683411 +size 696460 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c21f6b7657..63ca307a15 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c178eb09dfae1d9258caf01de42b7853f07031dfae313e5298b3168be7fadefe -size 637646 +oid sha256:d995b9a91d5c85181800729128ed8ebbd854a5661a90324db51667b0405078af +size 637746 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f53bb5805e..499ca3d472 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:31eaf57b941ad3e98c6afcc792c022a11b160b18e727274bd0927e443c373c91 -size 824388 +oid sha256:822dda0914a813d5b86cc33126ac8e2e91552c272daddaa2eef3ad026efcf970 +size 824486 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b9f68412ce..d7a92f52e9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:54be4cddea8c9e038c28e866bbd802e04a70b70ba07d4044588fe4bd3cee8537 -size 783834 +oid sha256:4e15aae1262e6117f97448873e79b34d8b63dab2deedfdf44e86dceaf4063370 +size 783932 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index e0828ea112..f4cbb93f58 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f52def6912926ebb3acccea888931b704f56c61018626b6c284843dafb3fc57c -size 807608 +oid sha256:11f50fc83a413d416ebaa912561d45e6578d83bd9ebb1a902b6ac68e1cebb0b9 +size 807708 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8252b701b7..572774ac10 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9fd8c6e44a2647dd5e95bc2d39dcd569b3286117f07176d084326de88e5e21d5 -size 773566 +oid sha256:a8351ba2c50987ea0fac52d5c84889492a6a17a1c401b8f82a0e3a44f6abcb49 +size 773666 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..487efea918 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85246ae8442a8698fc120095cba99201b612ff8667a42e02d4505699798b42bc +size 817304 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..2b35cf6495 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07b2d3fbeb65c0752ef8645544e5ac563162c8371b11fdfeba1e27437291d344 +size 714732 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 61a456ad7b..4827d873e3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b34cb888b00046b8b46bfc928bf5ec9c8927ba3e9bb80848ba4479c0d5ef7ce -size 793534 +oid sha256:5b2fc9d9c6ba2aebbfc9eb349bfe7b4659aba96f4473a5c734d6c9ffb44f7019 +size 794422 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6d4b95df98..27499c5c23 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:deef37dacde75d250be15bc129e9527836cbb7fdf39a472146684532bd530e87 -size 684698 +oid sha256:605893c088e0f42a49e34a3b7a4e1442cabedde39bf453d6589c64971d87474d +size 684796 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 6ab23de869..bb7b13bd4a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:40684beb350cac92a1ef475effc3aab31595085ee4c78bdc0643eb70cf1f1d8b -size 751994 +oid sha256:206a75bdbdf12e75619d28c988dd008d8f2325d5517cdb0c7ca7c7692e9dd70b +size 752882 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index a38c48ab03..c094db62f8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:367b8dda414a82172ae1b5db5045cee189ffda4ef4cac5025dbaab5aecad17ca -size 647844 +oid sha256:ed6c56d0b6b2347e91007cc3249f03940d61d47e27e92ee56d71129468505293 +size 647942 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a15803dffa..ec28472a9c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:af7b8f88caa8728a403440fe50ba1dc36b11931c205b2f0dab83abb2f9aeaab9 -size 796614 +oid sha256:003a1356440535798e27cdb103c92e3ca3088d8fdfd9a22ec78ecfa02a2f8ecb +size 796712 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6390a07ee7..999dbb3808 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2f9adeb4be70ada34ea0889d281a3ddf29d214471d8252080da26199a1bd7c24 -size 756060 +oid sha256:b3d70f71dcc3142750530433f1f8eaf202fd3a907a24b98becc773ad7620be1d +size 756158 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 64c1c6862b..cdcddbd838 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:611770980ea9b84131cc17aa14e7a1a96fcb18f9986487d7c6796397f96b8fd4 -size 780574 +oid sha256:a923b23702da85497766a27ddeac37760fdd3b1e8a2b3fb159ae1d63a4209044 +size 780672 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index bbfae095a6..82cd955846 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9a6af1348108a93c028cfb936ebc04c4258773e12e4c3c248ea47e58b507aa2c -size 746532 +oid sha256:e8acbc32d2bf616ced53b997ddf329cc3dda4cf070fab808d03dbfa1aaa7e2f6 +size 746630 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..78833aa734 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94381b6a36944e91335ee1e05348ce92580770047a5de52ce8362f2ed86c625a +size 789480 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..108210149b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc744d13846a2e3e84748ee98d4804a31d521360fd0313781fb9f0be8ae1dc55 +size 687746 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index ba2bb8b9fe..047110ebab 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:62bc5f70149e7bf354011749dcb48fc96a1d0961329dff6e5f60a16b3c08b688 -size 765514 +oid sha256:f7de61edc8155621a8a39856d5b1436f5e46c574a0dc1619d561490e3e60a655 +size 765612 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8aa554aef7..287f61ae7c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5dc401055b54c6e0741381e0e5b4b0cb76a8997cc198a3296cfdab3503f7dbfc -size 656922 +oid sha256:77b54a32b55c7a9981ce60408a6d886c54bbb672a8b9cd9e8d48535808dc0205 +size 657810 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 8169e78059..7e078da691 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aa14ec40eae4d9dc516f734f94279565ced45ed6d6c96f4ff45f566500f92c13 -size 724960 +oid sha256:e8797976271468c81c67d8557eca7f8d77960aecf04275dfb326d41963ca9a79 +size 725058 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 89f8e1c1d8..e81d8ac635 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8ecb57187914ed6de69997b8268a7a0531b53dd9ddf3e1f66dbe798a1f3228de -size 620858 +oid sha256:c2687c0ebf37d62dcc2675256916d68d6cdb9d35448d78a368bb6fdf025eb65d +size 620956 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..0f78cc645d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90a2f31208ee08c39f2dd17946ac8db8753b0ada98c1e515ef5278b0e9f17af2 +size 633352 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..0457520e57 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c34fd1befea8b2c30af8fe02a3e10fb6e3ae925e2099036949fa0311fe74ee6 +size 549625 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a946a034e3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c2b682a81a456af6d1f2552b94f36c91365fd262ef5594f927affd7ba12bf16 +size 668600 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..d1070264dd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:713708c824415cee5432d69a7b3452dbf65316f96ff70a17d9c33c669bdd1a6c +size 585021 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..70739a7d23 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:382a50a26d5d34a9452c949e373de5b10368a3ddb888b8a4de0784f405cf936b +size 629996 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..3f4d020869 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aabde8c988889a2e569783c7caf65053358ca2dcaab5fdd0bdebef3a13856e14 +size 549327 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..6128f75035 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea9bed541248ace4e44744b632f75299beb95119ea260c16cf7a37b840be3102 +size 665934 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..5e3fa34da1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fa397a35c49a59767209804a9545f1f17cab331e5d6ce0547f149e56fe7f25c +size 582849 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..92fcd7a098 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c06b008cc1cb1de93f4e53b411c932b94c2ce1085f2a35887ad979d195bf1a36 +size 699640 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..d881a01669 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99f61bd0eb4b1dc7831f74160b299edf86e24ad98da4d213ca9c53f11ad38e8d +size 617737 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..e1671b39b6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae209fe1f20e960186cd41ae3c5be0f1b9ed240980aacf850e3f3b080fd5419 +size 735036 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..7adde23e25 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f7820f072accafcb115182ca5717d4688607913e06d4c9775a101715982e779 +size 653184 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..b102583d0e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6df2dfde7859dde1a87f9bedc361b915848b6742d7e009f39b42f00a73daa32 +size 723240 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp similarity index 81% rename from cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp rename to cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index b61edb3c34..c88a6dfb8a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9c2081d8422b4dc0d86bb145c68a9af38f24652ac2dda1d21bf1ecb4bd654dea -size 560659 +oid sha256:505f0066b54f2fae8005973ac1cec8d7b0b162441a19cc9f901327418226f58f +size 629646 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..2606ba38c6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f947a84da8d724c3166f5dfec3604b137bb6333efa747ee3917523050bee7ff +size 754098 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..e83c74af01 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12544005706467b3983752a3d39bf1244157e570d7b27d6cdc964fd64eb4f97d +size 664598 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a88c713952 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:002fd13f59d06c7d656c92c0ded1534c01c87822edcfe026ae3566a44c39833f +size 716086 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..b662ce0033 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dec4cc2585abbd822a00aa945321591b14d4fc8150680241dcc608ac5fc870b +size 622494 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..f3552513ea --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aedf7033931083985f5fd1a0ca2d0c0e8456eec5fcd486e9a6eaf0f7fb9f347b +size 746944 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..65cb61e600 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b39d828a0edf55fca409a3b9ce47a8e88a49436bac0714be9e03896254782f8 +size 657446 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 9e36d0519c..364d7f4256 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:379ccf6d20e7837625f954a14c695a8b158373720569092ed9b0949b152297f6 -size 705216 +oid sha256:dcdd39f3f041543e84bc2d44e3334848c41eb9dc14b0359f9b384f45f33125b3 +size 705314 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index cb18184d7d..c11e84f4df 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a7454c8fe27152baa3c4c8b62e423975bd752908ceb6500f04414ae9dcab3281 -size 742142 +oid sha256:c3958467342430d19449acadd225716f87492bb0c4bd94ec68537119ccb03b27 +size 742240 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 2c748ca274..86960874f0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d83b9ca6933c61c17241651d0428b0be1ffd168b778d12ff3baaa9f1375a60f9 -size 691644 +oid sha256:a3560fddc8208de76c732608d11dc4eda9440721cad6deccfa69c2bf8bb8b79d +size 691742 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 1d30ff51c1..8430d73e15 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8d545ea9f43f1a38c7577cfa55331326bfc08c626946d9b8ac71a5024872986a -size 729358 +oid sha256:0dba590ec4eefc09be443ffcfb0dc45df616a3bd439157c129b11b4b2bd1694e +size 729456 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 5187176ccc..5015d94f3b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dbb1ebe187502c0a298ca2a53c6c69caf0f3d1eea0b4097eb1b38f3460a06db6 -size 679542 +oid sha256:db2784eef278c51c73088fd5ab64b5fe52613ff30701110dadee44d22f6a953c +size 679642 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index c0d350b796..808d7e9163 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ce60f5e9261acdd0a6061cb3f2817e68180f6ae2b8c410c37f7bcc6e3e25760 -size 593645 +oid sha256:f9ef7d0a983dc5b5f6ed5736804490a18ce8112dd9aad387d16bfaa0fe3829a3 +size 593743 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 6d78d659da..f344de54cc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:10a3ce7fb721b5008c16b38aa9a3ae2a5c62a80cb5ec655341ef58a6bd213c69 -size 719822 +oid sha256:63b8f7db96619c3af589008fd4078a0329edba25847390ebe07fefd6c2e0b5d9 +size 720710 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 5eb622b92b..01d0f01b6f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6e901513bcc763ad1f7be5da7f1af67198c4b7ebfbfccaa2fc83a78c022635c0 -size 631360 +oid sha256:4a54c098af068dd7856be70408696662e5e4159267f1888298591551441466cb +size 632248 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index cf37bf3718..2f9a210234 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9d5e8ff2fc16affd13bbc09e1e5ce547033c24328edd9b55b736e770154801b9 -size 698112 +oid sha256:28eaaba225f57308eda54e09cc39c2b86a25b2b310321f68bbbb24d4e29db0f9 +size 698210 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index be65d91514..7109cbba0d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5ff532a41b3b75476795ce22dca19465382fb393cb7f2a4659f7832aa8c38c55 -size 734988 +oid sha256:ffbccf3d65c1da499e40331d8dced7b8c122e1232eb0007be137173ee2db9b87 +size 735876 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 179198a7c0..e728a6eeb4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd7eee986771476d665ab3fe06bacf1c04855e2366685aa343e9d1dd276e6fd0 -size 684490 +oid sha256:bbd07c0f93951eca3748ff9cd779ca0a57149a9543a89c355d9ababa8494d60a +size 684588 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index b0e6492ee1..f7b8b11aeb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6a968a9bcfaa2acafd1491a85b6902bd42fa45d98de2a82c039abde3b21d1e8f -size 722204 +oid sha256:0cad62c7b18f8bc4ce3528afca14d6cc70eb518be0c5c1887c6d524dd1e46a48 +size 722304 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index fc3b30e168..5118d9e852 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:370003847095ff442c497328dcd43bebc2aa74e4fd2e656e342bcfbea4eab9c6 -size 672390 +oid sha256:9575fd90585202ef99663af39d73df0a36f7de0553cddc289ea3feb9f28dea93 +size 673278 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index be86f6b9c4..a47ff41515 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1827736f7ce0928a24262aeefb4de4414b456e42a43561e845a6a833a4d522bf -size 586491 +oid sha256:19ac6ee760ebaee5d3011f1a742797942fa58814859aa046b1cae301b9e19b33 +size 586589 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp index e6d949be4b..15247b5dca 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c4aa283a46a8268d0b4fb556d9f9b25683104ab8fe7d5481487234e2c80098a6 -size 713458 +oid sha256:bb342c64bab1cdefcc62565d990da07b208c96e6647b22c1bf25a661e5611b09 +size 713558 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index ea7f4d1d54..cfa9f89d27 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a588d11adfd62c655d16f2c4993390f8f244df81dcf60a183c1506a3b971f624 -size 624206 +oid sha256:cbae3ad4c8090b83f1bead6695cb520437e46782a52b87afbe9a6a6463e45b2f +size 625094 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7e9a08638a..e3b4cc3149 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:49052adaba56b4c031d6b78894ff2bb36f626ac976e0f5a418751335fbbd6d49 -size 621246 +oid sha256:dfcc7a85ac53aed76700bad1127061b9bfce9a5745fc82b8494abfdbb3d6f39b +size 621346 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 20490f68b5..0455237ed5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f66fc122d46a89b4de6f4edbe9f6b9c6236d99212a998a772b2a61c60243616c -size 597119 +oid sha256:bfa88586eba2014e509535bbb283c841410afc381132e50dca7a9a07b89bdd2f +size 597219 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 83283379ca..520293ca2a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6b4d24091fff9df900911cb58916502b26d8c19fec057852544edb448f42d2d4 -size 539425 +oid sha256:e5b701b1c9e0e34359e395ff4f1cd2ef98e0dacc72e7126503425277c099d4f8 +size 539525 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 045366ff5e..4f792d6ffd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bdebff14eebdf7b575e3ee72d92d2c2f9d22166ba2cd3301494b69ca41545353 -size 518111 +oid sha256:db6b814882215fe20a34dfbb2489a2eddd7fd5d7d62f932232457ed27c8313df +size 518211 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6ff0277dff..1ba93eb42c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ee60d686fab905d9c503b91ea7924cf3a585cd63d0d757a1427a4134ab2fec3f -size 612211 +oid sha256:a952f9c2851d4014ab39b03a2242a67f7aabce92dcb8792c04729272ca7228e5 +size 613099 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index aad0753b87..fec05dd818 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7b933fa8854241a9c29f4db24043a6cc5ffdc68e03081fcaedc28df3f7be87f4 -size 590799 +oid sha256:2ee239b27eff7d65dd1aab1f5940637d0d2729134b88bb1e527b2e8cde3ba761 +size 590897 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 427c40d073..5142b0e2ba 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:330ecd985017afe31a144513edf55efca148d255d16a503be4cc49c1bf33840e -size 530145 +oid sha256:43ba847ba3f1d9413844ed20208deb0997f3757bd5b179c4d5a46a96341bc41e +size 530243 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 84f55284ed..99d37e54b5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:22ac21aaa8b2a2ca45283fc9906c12c7b54dd37a5b4710bd5d561cdc620a99e4 -size 509473 +oid sha256:b467aa76a0642bc960f7f86f97a9566c70b0143feddd11cd78c5b84a4277f158 +size 509571 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..3520c19eef --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b28e9d1169b5b9057f152b2e433027f5e2aeb16441de5e5572b9253f3f53ad32 +size 713470 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..9b27bab59a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe61dcde4305ff2fabf6396e4894e504d74184dfaf92cca6b48b28f63bb8de94 +size 630434 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 14d4104e05..8e2a0bb587 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eff65b73405dfc1695e86fd1d8c16a6548f3ae312b3ef17e8650138ce6009ac3 -size 615601 +oid sha256:5bcb8789f2aefc57b31dfcb453c9da71fb886b32bc3f33756a550d27202d529e +size 615701 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9dcca47b43..8a614400d7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:01449804fad6c5b3371dcfa9b701d40df926fa908614cea4f8ac78234dcb93b0 -size 532813 +oid sha256:43099c3adacf1bc477ccf3808c3b27c84d6f9c9ef73ddf643d8f25d56f3ff280 +size 533701 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index abd42a6e10..d597cca172 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e4d1f9658d491b33b280dcc2cc2b10529fe784125242ef006ee411d26ca7b311 -size 589699 +oid sha256:859f50bb9dc75edf13f5136c7350d6e7745b17699b5b4c9fa2f1d1cf027c8cba +size 589799 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ca6d2d6920..27fac6857c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3a20f0da93b0f2475a64d6a6cee839029373a4761cc3289df8c850831954d708 -size 507157 +oid sha256:aa8fb73e9a9e8b112a224b75fd77c1ad226d2e7b64c8d1d8d1216025eb67d703 +size 507255 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..b505f6b719 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1e0e9cc158299ccf238a0f4a328668ae372db5150bc64389adad0d445f67dee +size 749112 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..fe969561e1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7864a18218b9558e1902bf3367a134ed090bc9a8fc8f8bc56832897abf826a4 +size 663314 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 857f4b39d2..fb3c6e1cd6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3de35dc65250c5c5a65f970c885c92d253d172f9777bb500d58eb4aa3b6efec6 -size 555439 +oid sha256:72380f84bbf5cf6a686ba1e34107b4ae89dc1baa62d9d3f28fdafd0cce56fac8 +size 555537 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 1427c68482..92762fb803 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:209b2bddf517d57e9234fa54ed8627a43477fe96b6af2ac67bfc8f520357b545 -size 454791 +oid sha256:9090e60d795d3d658873e9525cc36426373a36c47018853c827cad686a18db1e +size 454889 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index a52a024a18..4ddcdafb83 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:526f03ecee273c106785701cec4d765471fc81bf0b4be9ceb852c239e509706a -size 530671 +oid sha256:40910922c0f5a54a903f7a05bb0edaf21ce7c7f1269b33dddf748f5748dbc104 +size 530771 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index db5ebaba0c..f281fae6c0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ae9e6c4c77c3e16f1f6430fd83c929abeeefe03deda5ddab6dcab5ec8c1fcc5 -size 432293 +oid sha256:f83d46d6f99133b9ec5aa83fe54a5504180b26d9eeb64aff4cc55349564dd59f +size 432391 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c387d9cfcf..d99c4541f8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:97f95e957305850e823be945d646268fb8d5882ab7433ef8efd9ba662009cb8d -size 614093 +oid sha256:3aa0bb261e5ca08497a0fea7b6ba9e71be09f4988cb5bde0c5262d28a4b4a585 +size 614191 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8a370583d1..eba2b927bb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4e54eff87821b773fec60170594e1b961c92de1d2d62c9ea9c4b6b9c182c93f4 -size 589967 +oid sha256:051afe05427a3e33af8f346a0e3458e70845791413d651e4956e3cb9bc6fc79f +size 590065 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7eb7d6a864..03884d1c0e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:95fd9d1f71cfd99970baf0af0ad3f8bf7abb00c9101b03dbb2ac21b320ec9326 -size 535429 +oid sha256:cb16011628c801bcc2c175018f28f3915d0e82d1e9a9a253c03f2fc620069369 +size 535529 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index c9b395bfe6..c4778c50e5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c19b9e44e9494285b86bdb20007d0928ac5afcd0d7bac698d032a4b713a095ce -size 514115 +oid sha256:3447ba4f169772299a793754b0258f59f95e147b97508aee402c725b9f5b1726 +size 515003 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c2d16fbf9c..f932d944a7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4bd59a95aebba48a184b244e444d92553e605309f98dd61b19f575d8d91787a0 -size 605059 +oid sha256:f477377f1dfd275ae946d0c2a89d6a6d24704d6ed15c680a772d3c49302e50a3 +size 605157 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8eae54d2bb..357d85d628 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ac90800d6b98e61b285b81139fa3c5772933ae43fa60d545db49625ecec5e94 -size 583645 +oid sha256:47363e4b61d9fd51af5ac3b5722967da525d23e155147936f755ce0204dc322b +size 583745 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 50be24f401..fb2ddcd0e5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:684e0cc7fa17eb49ca21b6c8a0d09912db6e3206df4c003c4fa5867341e18e26 -size 526939 +oid sha256:75cef427c1f30fcac36dadc23d79d7b7edfb5527a5c3e97ef079b5fbf13c3c92 +size 527037 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6a3ccf7c2b..2e559ef04f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4c3455f4a3627779d9e5d35da5625931b864e3b8293b401e17814394d78061fa -size 506315 +oid sha256:f13c6399d7fc85c60537e683275d480dedffc90d5ceb78343701b12d28feef90 +size 506413 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..c67940cede --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f02fec56be183207d230beaf2db9284b1a04293c1875b89b591ce551b2ef839 +size 706416 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..11277ad3a6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a36bb301788fd2d3417923d7fb09ef4fe32551e9f028c288eab3c7be8b093e6 +size 623280 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index e6d46dc6d2..45a3c0ab8f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1032a442abe5921d6474266290de99bd426e5729e0d584f93f2b7ee54e2e88b6 -size 608449 +oid sha256:a7ed05d6b24cd71c0ffbe8142beda873dabb593a41247909b70af4efb9225055 +size 608547 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5ee3b6993b..77aa1c8499 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1a4f011ffa502b8051a4b5b1efe9942928de231448e1ef8b2433085508c7f8a7 -size 525659 +oid sha256:473eadc69e094ff62711502dfb98b3fd4d816586e49832fdb46870cdf488573e +size 525757 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index f0008a51f5..b18df6f17f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9cdea5d07460303347c1fc7d5855ae331b065d5f71b0305a8613bdc79f3df623 -size 581905 +oid sha256:37eeee1f1bf798a4ea06b850f2bb3dd8e229005263a0cfd96f23ffc16bd4d634 +size 582003 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 532c54d928..c4db1e97c8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0e4a298150887c199266ff3a172daa0e5967ba077e13ccf57630368a9bc54705 -size 500003 +oid sha256:75126c0039cff077902afaf6c0a4994d00a1dd3e6340f4b0d507d4eb4fc12279 +size 500103 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..6e273b2dea --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a82cb1ec1c810951a354132148e041a4aefeac5a36e44f1abf02078fa4d00d2a +size 742058 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..38b04a0860 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a81dac82e323a93943cad6a7eeea80dd8898a3c4a3d9153e73823ba49ee0d234 +size 656210 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index b9d91c6d76..29ba5c4380 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:85b80092e3e82e1680a2d5ef7439147d87a9e2470537295e5a0bdc10d418b6bd -size 552281 +oid sha256:f2a181f4eb9deaf5eff7a1224dbb495eca9a33a1aa60174df361384fac4c8674 +size 552381 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index e2fb4e3126..10049f454e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4e41fa0fd47b6a6870957511763b8491459214f4e2af520c1251454b409ac790 -size 450845 +oid sha256:5c174106bc551a15a16c773557f220a19ef1c3e27a62efb58006d2d557b164ad +size 450943 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 3c6d20d5ef..e6b936e3eb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b461ee5870500abd976094958d5a22448733384e2fa7417357b6edbbd54ed56a -size 526675 +oid sha256:058ed69b7291fa738725ba5137332a9d09d4b9c6f76072d5a71c2bd9728f1f97 +size 526775 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6ca64a053c..5ab84fccf0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ef84b9555bfdacfb4908eb695c7063a5ef77f02cf642d7d6d4e378e8a30371c -size 428297 +oid sha256:6cbfa8e596bb5e5a35e15417404847894a2832c51f13123bb4e1899b9f0b89fc +size 429185 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 845a6b07a9..1cf97d83b4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e6fd9c1f4deddf28536dbdec658e98ed3665dd02889d96505eeffbe7dd3bc5a0 -size 641852 +oid sha256:e922e198dc491bab7846eb173d808c4a1fc2ef201383cc3c201507520742a390 +size 641952 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 763fc98189..f5b61ede1e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:745d0abd2e8645e20ccc1a260ec25b68ef43f44660eeddbeb025a74d6fa7cc2d -size 603961 +oid sha256:37bbe13f153eee9e4b3ad2f5132af5e8684f8d754e1d7bf78aba388e024037d9 +size 604061 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 822edda5a0..ff706af0ef 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a468424919ada704f3531ca1c172dfb1c923609171e12ceed4fd770056657629 -size 560919 +oid sha256:79f6113b493cbe9c8ac099676f4d2a2efce7c3a532f65c3f8de2cafd6db60dd4 +size 561019 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 436c62f827..fe2b9b8668 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:399b291d1026effb6453148c68edfa75e84d63e3920213ca65125ac425456f62 -size 538125 +oid sha256:27137893a5d20140be67b56a1df62189edee9b8a755ab905a2755bb39f5eb4d2 +size 538225 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 1f99dce190..1126b3143a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7515083203fce2fb8c0cd8414fc787a64410df48d517eb148172a32f131f20a0 -size 632770 +oid sha256:7b0b5faa59a2075128853d9e47f709ffd24f846c31b5234299977ad749b02736 +size 632868 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index dd31e4c69c..1b337a8025 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:182c049e14aee4238f4788d5f3fffa4ca4ae356770d255223a3e9ca185eefa7b -size 608987 +oid sha256:932e1bc188b6db62899179474e2594259ff01c0e95367c325fcc612a4785b955 +size 609087 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 021cca8c75..ee07738d9f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7399a2cf39c30b1d53bd2e0703c2429417a7e6b5f1abafb757c27a24d9d6f7c3 -size 551639 +oid sha256:42090550a89077a69b4b8b1ac9736196e881d57ba4f73a68b13399068144d5eb +size 551737 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 39cce2ca97..556b47ee00 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4fc001c1ed681c81ecaa575ebfe706552853f89d57295cd213b9649d22c84c27 -size 530769 +oid sha256:42e12a388fd63082cee0177883a6e860e03024c1a4390828b32b932e42a99c84 +size 530867 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..790d4e10a5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6c6db41c9ed7b0eb23d95a52801a63dc18ce5ca947546a2fc87ff9676376fd3 +size 793770 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..146f9fab59 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b5d579a05e3e0b607cea318ff531a9d6b088c2fd91e087bdb080943a741f068 +size 700126 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 54608def66..6031d0e94c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1fc7edacdecab48635bf432a41443eb76bec84c8d6fed4cb605d0c817c6c6ade -size 642030 +oid sha256:ffe24328981eb6621c956e14a008d9ba702ae537dd56efb49ae5f9a6fba13968 +size 642128 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 16e0fdfde6..a3cd2ebf28 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:548cf8543befe5b2752c73c967ddf3d27c2299d50d2c36187f56c3410094462e -size 545131 +oid sha256:e0fe58656a73de0a0f1fcee6889538e0e34b5609412d0469ea0b1cf463c81a59 +size 545229 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 12b02ebf15..dea4dd879c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:43a458fdd85bb90e3b0b30931afd478ec151a426ff05a341df4a19de881ff6dd -size 611637 +oid sha256:f4722494c1e5df3136241ec69b691b6d8be4bd10fb26e8c7a260af841eb64276 +size 611737 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 933b8a1dec..53fb0fc210 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2980a4933c69462f31821103d9e1fe5cfbc674f82751ba4646ed4e73e8ef0d22 -size 518833 +oid sha256:544f18af917b996cc1adf2188e980f946b92114501677bd465b02ede6c71b9e7 +size 518933 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..063e5c0fde --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9adb600ec2e01506be3e0fe948161480eafbbdf6c165be60851adfdce4439a48 +size 822358 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..b6185caff6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f28556dd56c653742349c49fa9018034407b089de3376ea8280c96d9134d8dae +size 733944 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 7bc5454bf3..dcd3b6f5d3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e826aff150e2f8c9a3532b90256a666d283f3d46a7e41613b192305ae900127c -size 576637 +oid sha256:38717cf358c13e6bc520b3a3ff31b680b15113cd46f1cac8417d2d77d9b82d9a +size 576735 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 600896ff27..6d65a676f3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cf431f00db0c123a74e5e5624f5fd38ea182ce14a8c822d9b68e54099b5fce53 -size 473769 +oid sha256:9997a01e7460f7df195bf26aa7f83973515b5095cabfd6be2eb02633b627446e +size 473867 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 1e30f0e0e3..6f4d423d2a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8dd6356ba40161838e3216797d5c780d1ce49cc09b03c9b16b22b5f853624141 -size 549797 +oid sha256:53d65b5a3909e42060d6a07e7682fba93dac552b4816d46c77a9fb08aacce028 +size 549897 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 50d248a397..c6234f417e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dc6863187d0157a5a5bf5fd7dda9e519788162691473d4977a37d387704672d1 -size 449643 +oid sha256:f552f8b486f4206c22c4c849280ad1b4c255a3610a8b27dd871183c18039f073 +size 449741 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 66411d7864..badd850f4b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aff37aaec671a9611e552756226c17c2d22c757ff6da891f69c40f381dd2f021 -size 634700 +oid sha256:054e35c6838c5ebc27a6a8e52dbb720479b6e035a4a503ee4c00934bcd3e2f78 +size 634798 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index de89ea250f..3b6f160983 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f2e683a33080c701f48caff471710759c1adc140c518a5b3ca3c9c9b530c2391 -size 596809 +oid sha256:b41eb6b4861a6acd9f5443e0bd732790c7a6fe3f05f56645aa14a2b56685016b +size 596907 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index f780b79025..a05bcc5690 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:48618527ab3193ec923ad49d9258d4e6b066d28186390c8e2e100c65f7771944 -size 557763 +oid sha256:6bdb38daecc353ca59dd9dbbf90dfe458c89e214c58462256e42f1c89f671d22 +size 557861 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 31ca9f0240..73ff3717de 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:36bfd963630c8463e1b25a1cd9c6a27a26ee131015d58f65e40f0eee709403e8 -size 534919 +oid sha256:7e5ad6b7f045a5b42c41524fe6203176ebdbe1ff8fe347c335db02c867226a54 +size 535017 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 05078a9070..ef747b8a20 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6171ec726d609acd2361d4d9f21964963ab34fefadd6ab2c7926b5411728612b -size 625616 +oid sha256:d5e7ff86c4e320e48c02af2eb6d3ff4dbb26106970fea16db3a73e17e003d617 +size 625714 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ab6c9c60cf..f49ec9dd5b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:334ef7fef45c919a91d00098f117f730af4910b0a293164aa63808fb7dcdea3b -size 601835 +oid sha256:3901097eab1c7ca8f4ccfdb2cb28c79b2130c2f5b0954fd0e77cb254f173c398 +size 601933 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5758096464..b7141effef 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:64f04e1deb8844af73e37c9e227737112577112d72fcabb488d5836c8970c8a6 -size 547643 +oid sha256:dc1451a855ef6106ced708f8baf2f0d31cd031703a5200c574b5fe9b969a7a40 +size 547741 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 9b1e5d0b1a..bce4a33734 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:016b8e052565d3fe1d241738c1d8cfb895618a15f28b6752a58ac46cac3b48af -size 526823 +oid sha256:982c0c19acfa6976c65a20fb56616aa9551545d1a80db8f10c7f20ec408b7003 +size 526921 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..28480b9700 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8068e581f107bf8958af5a533e5848a7d153e99015e9b402ca9bf098ac73ac18 +size 786616 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..1efcae98a4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:199b1860d6d93fef3bd7224ee9cc74e3a6efaa5245b0784acb42042d52d64f85 +size 693022 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 4dd8f71177..ebccfc8cf9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:01e26c3c780395554657c849a21ef69bbd29d297c7497018ee6fbbf6a6becd3b -size 634876 +oid sha256:5765d9e0607decaf19ebdbe732750f2e07a167d296f4ff032a748d801304aa1e +size 634976 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index af879b72b1..2666219672 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b8cdd4f5b744f39a4b655586f09173b63361f7720b83ed6ba41caa01f7a87b0d -size 537977 +oid sha256:b00c0d7f411b20f65cff15432cc2c2f2d4a46dafb0ab12a57cbbb4716c1d3336 +size 538075 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 64c2de3799..f9d6a34781 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:810aafe4b41525e5d68330c288db359de9533e3e5ba194fc522ac44017150686 -size 604485 +oid sha256:f0e77122402b7891595c31d0294a4d2848065d91acbb5a0d4e9dfe69ffe01d53 +size 604583 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index d6ee7adfe0..4bfc092b54 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5e5f56ca51c30911e351f6786f75ae991aa422eda8f4adcab25dd0f0e7f97003 -size 511681 +oid sha256:68fd5801da9a5876c386d1ba07a1b4ab999782b148252facf925fa67b6f9311c +size 512569 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..5b13a39edc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82eb5e0021c29f09a69ba631560dbc52239021e844acfef7f2a3b0cb92e369f3 +size 815994 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..7b67930945 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6810499bfdea47e2ce3de84913c7b40e1bfa7face687511034dacf071424d07b +size 726790 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 79663e3b69..7955758b1c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cd9cf2bd836cea3e43e4dcf73ab999e391f94c149359534b7edfb7d5ebf17a8a -size 572691 +oid sha256:846bc5d30325956cbcef34076baa8808be2c5b58c63410b7b4713c778bf0a050 +size 573579 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5d63b1ceba..3882633f54 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5f2f1e3bbe4003c0780c187918ca2ac6c9efa678f3805a4f8131dbc2131cb04e -size 470563 +oid sha256:fa5336c9038428103cd5b875fd690f736847e9984f207f2bc4381f54998f0cc9 +size 470661 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index e831b3e934..b09f898942 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bd96567f39450643999ac35943f06959b43c8b24ff50ffbf02aaba8a4cdb73a6 -size 545605 +oid sha256:3fd534bdc3674b87280b28abdb67c99df5ed5b287bcdbff3d411a8871f851359 +size 545703 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 389bc0a6bb..4270c194f6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef05bcec42e96ee80900edac14058076f2a179dfb62ea719cef701c252610096 -size 445697 +oid sha256:0d928b1713ad0e2c0fe1a7a4a331e98f8c5e29b95aab8134dfffc5a3ba18eb62 +size 445795 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..d66e6ee0aa --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:205d47ad3de4f53eda13abae68744d824a45e21488b487859c19d67dbc765e50 +size 710328 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..f1ed78250c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adc2f8da6e97603ad1d8f0eff16e76d40e49d3857724c8c22caa8b9bb45bd2e1 +size 618512 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..eae69840fa --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f77e4dab9fe13c5421ccff48810cd316468865b0973c0144ecf8c65a65a4d21 +size 707860 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..b779ae3390 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e639f2cec259d19a07974f8eff51e82017a299634c88b077f810105e1e5f6f42 +size 620730 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..6d9720f119 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac37a4cf2b73c6108ca08f734c9989792ca304d93bed6b4b72705ebb6503f3dd +size 816102 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..acbc552f1d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1d3b7d2a919a3bd5725845adf66f684e8d054fe352c48440238df2e8f8a348a +size 723890 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..e6a283d8db --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1b43ea9ca4481c6d8d4894c75b1097764a35c54b1dfab42c77a222dbfa1ffc1 +size 797058 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..cb0df9d4b6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa863dad23eec784434263ce98a6ed0a0d6aa60abcb31f416c4b9d0d0a1c3f7f +size 703218 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..f5523d4707 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e565413eeb689a267ebd4e7b09114b91d59726043083c4ebb326072cf9488ada +size 814126 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..4931f69ff2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fae98f74c3b722c63ca2808794d144834ba77b597a02a7fc29407eff3cbde0d +size 742388 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..9037082487 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dbe4a1e169fd5f1914fd28eb54a7cdb4a3a8220be682cf3413be2bd6bc48a42 +size 792764 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..3530900c33 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb3a74b3cb1c895a0bc063f9da71e3677b73797e650a4de1bfe1303286fb2988 +size 715254 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..b355ea0f60 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e724cce9aae65766f15c457350160d9c949fb700ff20d2b34fe25f678a46c74 +size 711122 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..92ef3c4ba7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e2d06a0dc18ac6c979e811f83e296d0474b94e9d5c80e7e6f90bdd7cfedbfa2 +size 619304 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..11741dfe75 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:308e0b9e284ab7a6f068a58b0768dd5b8386102d869bcdfef6e9b1721b4b75b6 +size 708654 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..2581299498 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10380e1efd9021d62659e8fe5ed5c43c532ce76cbfd3997bbd472101320d9b7e +size 620734 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f15cbcaeaa..03121800c4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b0bac4894947ba1e73fa60530bd8b88534d4b01304301defa9f315cb84860af9 -size 924488 +oid sha256:7b854b5407ee23a6072eadcbe26e1de2a8c278f9d3ec3c17eb02c3ad76c95389 +size 924586 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 8d0456f549..dce36ff446 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e3f796537a38d374db15408c9ad3e7ed61dc0a1780a7143d93a066723cc7a695 -size 861240 +oid sha256:27fd261564546f29ca31317ef5f966e2f183804921cdb7c8593170344b034cab +size 861338 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 316c4688b5..0a8a7d1f57 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a1eb2494ba7c81bae7ca332d0c24c8b1debcc9aad6e037b86063be6788c0cb41 -size 815116 +oid sha256:98c1141ca99313b2d57be4263138d12bef68f85c063c1039674493a0b88d3f3f +size 816004 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ae05531c73..a10d6b99f8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:90942c2e5fe90bf73dd1346abdd02ce56094602bff1722a8500c14cf256a015e -size 877964 +oid sha256:274cad15cc0ef02be2323d07923dcce27163631deec720290d907da0ce4f499d +size 878062 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 0ee6b97e68..338b327b5d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:df050bf34f9280b93a1cfa6c4204ce0a34b269cffc8ccc54436cc611142b7b7d -size 818960 +oid sha256:6a70c9e7f296347dec4edbcf675e322f634f3e359bf9062a4d356da39233156b +size 819058 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 9628c5e533..ce093b6495 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:848866af96feb85cc1885003e8947a70f06fa3d050449c90c5cd40044b8b77d9 -size 708612 +oid sha256:9e31c18081d4433bb8564b026c16962daed7243876dc8e4f477b3644777c7aef +size 708712 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d024e5624d..b73bfd2d1e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:02e4d3fe7b788c6459a6ffdf40d9d997518ec1d65032d3c9d8a8488160de2f1c -size 912838 +oid sha256:f7179da93f3732b73334d6e04d09808db65b3cc38f3541aea903a569a23577d0 +size 913726 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index a7d078fc78..400a2cf54d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c89f1a3c41c0be421315f84fb9fd213c041640915bc1b86d6abc2a51c1753b88 -size 850332 +oid sha256:519271cef1fb79a16829880608dfafaf5d4453eaf06ddab0da61a96ca2eb7b65 +size 850430 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4c273a8367..01e8257341 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ec475ab414c96f518dbcb619bdedb4464566548d74fe6174c5be35678bf0c875 -size 872828 +oid sha256:a8ed8cc85a5a4a1315288a8fd1fcff98222d24ccae8ee9e47f59d3d935f35cd6 +size 872926 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 565a0d96ed..fea2a1cee6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d03bb9f89ae5c38b3cc88f48d32ccf2b6f47817d35bac1b2e31ec6567b3127b5 -size 813822 +oid sha256:d56f4c8e596bc79513979ff95ea087710e2bdb0775583377b209a1497be432d8 +size 813922 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index edb34f8099..5e2aa2d7a0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:40b64490e2864260b634cc7183341a389928db30a2a402bbee918491bb61f639 -size 919928 +oid sha256:b2be68733ce2bf0e2650ee0bb12f6e610f56570ead3c5f6a2a7ad726d9c9fd76 +size 920028 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c779aa4e4e..2d916debcf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0e8378304644094073041c8b4a609c963c57af8aa3c4aa4b2df658e26fd57f97 -size 830282 +oid sha256:210a366e12ac4064b26a13d774e30ef6f9796abecad297bcc17178a0f3926edd +size 830380 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index 9953361c4f..bb2f5d7e0d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:773525d1bca58d09b5c89ac60717f593c88a4445785366a3babf2c33c9590084 -size 860480 +oid sha256:bef432cfc383257cf21754e29c3828349c9115381e3ae0af2a796226b19e97c7 +size 860578 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 010960a261..e1095f10ca 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2a0b4e862d0b753b61e7aa2c6ff452664ed10994449984ef284b3d07b9dea5e8 -size 767578 +oid sha256:691e83ddb2ba39b1f963a678d72572f2b66fd17c9b164382fc98c6efc9bb583f +size 767676 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index 35652b3cf3..25b89a9420 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8498557d7815ab5dd86dabb000814ad158d38d250d7f65855ac9319821f94c92 -size 727232 +oid sha256:9887154be69146bb849fa3f1717d60f00bc23c8d4128c307170276dd2b23f773 +size 727332 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 8731fde108..7762b12e43 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dec83160b44f13e8ba65e1a54c4d3bcf1220c6456e8c17c88d2c7b4c82335110 -size 672564 +oid sha256:140ae8df5543c014288474ad14feae3c4d437131fe0877c58ca65c4c361f0ddc +size 672662 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 37e305cc47..0aea50c53b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5969693d0531be7ace8d54fecf6dbf6ce4c5af8f226640e3f915f8af79de627a -size 882334 +oid sha256:2be609e8c121d5c2d5847b4191597abeb9d74ee1ef2c386dbb58996d952c7e26 +size 882434 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5dc2ff8deb..fb1df57ed9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6fc2c8e5595c3a16d965fbf69078e263789bd8b11dba5337c3070a04bce1457c -size 791898 +oid sha256:0568563f20ba1fce7b5e5170b9b113695c9d8684c63157ff34b963c17d6b3ffd +size 792786 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index bb379942ec..863d93bfbe 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a88d8ea8fa4eb227d48ae4cdcf2b0b9e0549d6f7c0fd395b7b658f100881d092 -size 827178 +oid sha256:e80cfccc22d6442fa249da99551067d961ed8a080452187dd98a43ab21227d5f +size 827276 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 514541a78c..cccf8c1550 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f196d8c87a940a0cf95edf5e232995343203acab428fba7b8cacb50a6b9150b4 -size 733684 +oid sha256:033f81838dd0e54a0651740b595aecba9bb661858a946c4f096dcc08e262340f +size 733832 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f743cf2b47..cfef7a3240 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bb772b87e1e6df67263c65cb7c72a0389329f3b3565ddf5e6fbef9bd41d01f2e -size 878656 +oid sha256:17000a859a72be3cde661471cd50d6447d9797ae44b9d829611358ae48246f5b +size 878756 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 938add8a54..f7d9cc62c4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b0ecf99c145065582627ce664a176a40efa6add1fdc3b4c6fc4cf16909f74c8a -size 838990 +oid sha256:87587a1a082ff1d5984d5a7780b94d6303bc75c717d9136466ee569e7e097160 +size 839090 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 6327b63a1b..9e7b31fe1c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:25d7a4a7aca17c0f30f636a2f0473d6aa8947e6959470647d8d67fbe57e81695 -size 757296 +oid sha256:887b850bdb6a0c0d8e4ae3459c0b3cdde0d87a5bbf16360c9929136792f43e01 +size 757396 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index a21fe3a349..94055e1b1c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:87376496cbcbceadd837f751ad33ae0c0f6371598a4754f97673cd67b7ac284a -size 832084 +oid sha256:1988e247711043bd8d1995d6f10aa78e30dcb0dabba76d2b53af16e1f080c660 +size 832232 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 92264fcf64..aef7bd0c2e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:43d046031602607b57dd5c06672ec9977547219e7b118d8d293469a180f8dd8b -size 796760 +oid sha256:1e491aec88c7e1fc78f0387eca14fd013760b5803a7c7968b16521b6a848ab69 +size 797648 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index a3af2fed35..3d778b9967 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bf397cf5bdbf708318e9dfe4ab63655fca52742bd09921b14238579f9a70a817 -size 650892 +oid sha256:5882c07afe45fa5c87746b3ac8dbb7cae6c9e8de0a8ecb658d779681706bb8ed +size 650992 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index fb20da00ad..f184cda8c3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:43519260fc18ebd165856651f4b90a4aea8d57af09f2e0fe6a7bc04ae663ac3b -size 867748 +oid sha256:0c5d76d0593673ecbc94c31339ff93273a53f7fba54d47793dd0d5aa78a4157a +size 867846 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index db0997f0c1..f50c9db96c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:85f1f1dc5b70a01d5dbb61b623108436289592edaf83f94dec5d2f7a960083b7 -size 828920 +oid sha256:170bd3ca8f37cac7c13fb12f55b2d0529bd1ad44be046e58c5e6479919125087 +size 829020 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ebf34220d3..7b68ba5b09 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:543849ef9112061938d9ab709b0b7de594079efda0390d6ed83964195b41e070 -size 826948 +oid sha256:0c09d29e0848d8ead674ec2d55963ce21216bbf9568bdedcb62eac4e51ec2e9d +size 827046 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 260d1b8ffe..686e5874e8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f71415be160691c3d02c7fd7aa31929267d75d0ceb4ccc3c79303071ed58ad3c -size 792412 +oid sha256:32fc370029b71ff4ea83ffc76a2ebec5532afb90a7b416b161fdccf4d23140c3 +size 792510 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 505103a888..51cf1ce504 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dabafb943c5f834f9ec3ae76a6c1892b23acfd058c6e9ba6732ac8d7fbc8f9e7 -size 873062 +oid sha256:ee6746d9b48ec58ac298ccba838bfe9bfe4c179d7e10421a44b71467898259c7 +size 873160 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index e77a5bb7ed..0e4043e53d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d0e068510c66be22511e7962c0c02299a1a85d29ac2490969e71b126645b95cf -size 785192 +oid sha256:c835d2a4e14c69efc5d8291f2db73d5add7823ddf22322aa037a3e28b1a50b1b +size 785290 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index 4bef0df896..6262afea0d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f896cb745e987529f083f3c2fc70dd4f5f01a31ac5d52b708fc0b2967fb58d74 -size 839958 +oid sha256:b3863c86f0e826a52e25f21b66b264c7ce0f35831de3901ba8673fb9feb00524 +size 840106 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index f96735c711..0438c5c49d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3a01dc3161c99c3f5c906bd0d48b586311dc107f9a970b99f8aca9e379eae34b -size 746166 +oid sha256:51181b2676997eb6d397c4f6653888cedd3ec3b814a3d57431ce089b75204ddb +size 746266 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index fecda76810..048fd363c0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a83abdad6c74e74fcf8f151fd1fb4e35831943c655e56695702cdd1a49e501f3 -size 678886 +oid sha256:35be1c2f01f1a8a532c866dd6e1771f9c6503d4bf7486cfa15498cbe7d671af3 +size 679774 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 057dc1b04f..2f8f44fda0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09c712765475fee0bfb3dc919e287d9e24e09af7f2300659a51bf7beab8122b4 -size 615681 +oid sha256:e4e482a83fa4f20e3c5513bf1aa857a575d8390cd5bbc9a498c80eedf6a18870 +size 615779 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 8d41309acb..3783980d4b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:099d2e3e248afba423d10cebc9a7d93ee674bfa54a3bfd6e08a0a123a1421d04 -size 836258 +oid sha256:1d31371c842d634c054ed4d912a1c953cf5596dc036ed5a6a8e16d64cf63c21b +size 836356 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 04c17d4469..9a6d9f3b4c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8e4040f39d3e5835c9b0cb9ab1781f007fb51ce06cb6bc3a12fc5f753aafeb5e -size 746858 +oid sha256:362823f9796b168012bc1288d96c53c2ee698a038304fc4747232c60c36e2944 +size 746956 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index 922781085d..02fc33d18f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:371db73432f1346a2e291918f814b3e5b99fcded130a6c3a34e4c03454c70b67 -size 806704 +oid sha256:e623beebadb08a9a8a0b94f2af19b911ecb1ccc9466189aeaad0060d4061a561 +size 807592 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 11fe6fc60f..89445c2cf1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:51bf22ffe7deabcf71cc8549c6603e4fb996b3413a072de38eb91bcddb509c3d -size 712272 +oid sha256:9324ad2511a01512de546fefda7cb82b052a3895d94a8bbf150411e919978fe2 +size 712372 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3a5c807788..abe7c802f5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9f601cfe3db060c40249528168979093e979d86eedb1b82d8d060d346bc82709 -size 852016 +oid sha256:02a2efd9854dac63215c682e5c675a820057a7a9c82a3abd588691d8333b5bc6 +size 852116 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 7713fb3dac..48939d92eb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9f23b00b758f331167f2fa3b4e2b2f04caf30896ce0405d62608cab553bd52ab -size 814226 +oid sha256:e726b32b0bb25fe2fa40a4aaec6f339a6328102cf19c661961c32038cc80cbe6 +size 814324 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 95bda6a152..298053362c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:81b8a0d851a2c02cf80f3a755c3f7fec6a776b9e60ba45a52816d222f9b8fcf7 -size 840466 +oid sha256:20c5a3bc6a28626dc03feeb161e79794b03b55649e7c66a969cdb00c73b04399 +size 840566 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index dff8a5d3fa..362910d8f5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:056788d1f4acd0e849342799dc39cf7c182434dd6281743fd4ac10a0e406327e -size 810224 +oid sha256:c942e7d9778a12d8c7c1b74ba7f0557efe05e43940c2dc678c39913aae017326 +size 810322 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 0d9b6eb475..5a34bcb8eb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:44a45d6f17b76ed675b9a8db95ed0844305d4a3a3f4941d94c793f47a5b0d506 -size 861814 +oid sha256:97ffce6720938654beaba2cdd82423d4b974c30f61685d485e53f9d52ffa3dac +size 861912 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 66fc5f67d6..056cad249f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:65eb774ddd83ece83f14192c1c523a60f01e5609e04fb3b0ad5367e72a9baee9 -size 761906 +oid sha256:067bb1da66d2421a4ce34fd2feee60417ea8366dcc94c20bacbff400dccc7214 +size 762004 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index c1e61e6bdb..a5d6205a82 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:720881e3cfaf8456b4467960a2d9b335e222b5de6462a43715c85443f37807c4 -size 832854 +oid sha256:f80eb77c0731462cadda5008b8fe5efd529ce203f4f34bcd395d1505df71bd08 +size 832952 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index d7a41846fc..39a41b676d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6def72ba4707200c7cfbb8bc99a118226a218e0da8991c8b2034135d762af6d5 -size 736350 +oid sha256:c732495e8fd87ea45c87c94ae5136571478df09372322e34e0cfa5f9ee7ee6ca +size 736448 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d9396aae40..635b9c929b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:66dd78a5c9b466108d1e37913c2ca662def230be52f1180619e3e4b05682fccb -size 999968 +oid sha256:dab492a6b1f1c613a17ea2db8e5e4de4a1a0283ea95c11da2735b588431671da +size 1000066 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index a23839d507..9f889fb620 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:65c605faf5208fda81b2aa734ddafadb4dde76106c7ade8d01f121f0c8e5d81a -size 917282 +oid sha256:36541c0a7a234ff1537d283dd0bea3c2798685de35aa2f81eed838b12a612f74 +size 917382 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index 4f0c463b3d..dcb01189b4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:568c805211208f248435060e3c3c87164261fc6cf10083a7d9a4df2a6b446cdd -size 932536 +oid sha256:cee3cb5829a7f2f809a3e9eb3a964707cb0ecd00e00fcfe8d286722ae1c4eee8 +size 932636 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index f38ff69ec2..6e1f42df12 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c129d0c9e8234c284fce82a983a4bf1713a42b2f1645f9c5d9434d3b57db8f52 -size 896712 +oid sha256:0f19ff026b726b955653b873ebb10030e18cd3ad845e11c0796ad0a59b2f0a32 +size 896812 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 848efdfeec..8c11162ff2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:12bec7ae16d62c2018a3525e3fbc1f0cee2a28f4a300e63bfd4e23a8eb659a14 -size 958230 +oid sha256:976b88d176724919ce1d9f934ff1f4b5dfa587d1275ad5e68bdc13e78c1e6bb8 +size 958328 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index ab6db0854e..605592ca43 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f75381735eb03d87328ffd02b9476fbfd34c9e00c536501c50d0ebd5eaea4150 -size 879096 +oid sha256:6b14d83aeabfb59c6866fae5e6e4d09ccf1a98cc1f25d8bf8baaa1c386be442a +size 879196 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index f08c102436..ae6972cdc5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0e09ffe42be5bd9f0161f9fdc91932760c34b38ea11f4bfb973dde91d5954781 -size 793276 +oid sha256:0e9906f945fa13c5e7b941692fc9b387cfd727c6d54c496da1b11f31f36031d0 +size 793424 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index ad7a3480cb..16b3d36078 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9ec937a63dd077adc1d6beaa2442ccb7dd446d3778329857d2211e5bf97df7ff -size 776052 +oid sha256:f91b64a45d31945237d248feb2d56a094cc9f30c8caccc42614d0c10654b7351 +size 776150 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3b677c2bf2..e134ca0647 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c750eea6c6515c50f7241d9476cdd1c91515506eb6831a2f60fa80ff5ff3faf4 -size 984174 +oid sha256:b1a8bb15bafc3a45930430f337841a30241ee7e561b5041ee8ddaa69ec0381d8 +size 984274 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 99cc6b733d..5a382aa865 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e73712586dc95dfc39a3406cc2715296a0c7fe54e669171e5e448cff7ead25ca -size 900750 +oid sha256:3b59089c60d28d163b70ef5ff0a01dcbfb516c4b0785402e6ce489c205cf84be +size 900848 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index eb1280bb6e..13efc5ebfc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b27935e61fa953537afea0f299a3e47a8b86c792ed7a8ddd770ecc0d8151b7d1 -size 948948 +oid sha256:36f4eb0922b22862c1721a2fab710a53340997ec3581a916c244e9bebe740a12 +size 949048 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 43b2569f5c..6c66ad1f1a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a5336e36b6735a677ecfc1f21b2e3480e121ceddfd1406a6fd21ef143c665beb -size 869816 +oid sha256:cafc4b01e6daefb066d72c0ed80717df9ec9c1de0ece656c91f39c6d309bca09 +size 869914 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 38eac3b3c2..d4c33522c0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d9a61589c8fd5915f833b24683bc1b998b62e0fc1dbc7add9775a56aa63d8b23 -size 998912 +oid sha256:edd47c7c91113e38f3b370f8d3cc097f1880fe7d21c900def3ce21e31b7d583e +size 999010 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a771cff50b..ad8c1d9f73 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d11431e9300b36d3c57e3d196d6a0f643e6ac5ceeb38d6a1ff67c6b2fea51e5e -size 900828 +oid sha256:edefb9cb74c979b7b61fdf04898ae08fd42abb95fa7dea0c4610e7632a9a4349 +size 900928 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index e87c74d431..dfc8b239a7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:75b538ef0f17881629e371d35b256ae79bb025c3f549582bfac15dfa30527197 -size 916078 +oid sha256:1cbc435fc41aad33d6395bddf2d889a3817fb0cded7dc7e7ae8bf5b18c76e4b3 +size 916178 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 9187fe4d85..0279054ef0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:af1a8667376d75fb81a24eddb54552e14a5fcdee3250119b4baa2b2f496d8e21 -size 818046 +oid sha256:4910d282f3c7216f4562a204a74239ffc56f992d71a402f8343abe1e26cc8ed1 +size 818144 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp index 3d78f1ff68..f215b4ab55 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6021bf78f664864eecbe548bf33c85f61662a47c04e9e2d888c0113ad4d0b42b -size 849242 +oid sha256:a83cd112f5d915dc78b9d6fa75a7edb9b48a2bab59e32a0af543060b7bf82568 +size 849340 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index b23b09c121..0361864a9c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4b2ff29aa7f6c19de97366e1a3f09f6cce89ca9fdc662276dbf187847e2f4fe1 -size 797680 +oid sha256:8c8aab7dd62eac1709a8edfc68d5846bff691004bfd4d2ff504deabb368eb503 +size 797780 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index f2ad9075a1..5fe1aa8a03 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e37fd73817e35ed393639a1a20f1987508a5155a10d582f9b335a5ebfc563dee -size 756832 +oid sha256:a6b1615bf071411c823c913d1d3b393a1441db179fb766814113356c61d6f00f +size 756932 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 9fb37b5e78..2923f8196c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e133937c00dd666949b5e5cc06c32ea43719f71263fa359f33671a63b6330f9c -size 740002 +oid sha256:772e769f5a5d734710d42e6aab279b226154f5414d27e343a4d48d0a607ab3a8 +size 740100 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 2968f7ec95..36127755cd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c11dca7710874d640a829f8e53d65625f01b13b6d3de9df78aa8ef20276c6f56 -size 954706 +oid sha256:40979f100547b5f54c08b66129a3a78e51e28e686ea9d5e130bbb2ed170ac4bc +size 954806 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 171a9288d5..8974af4ebb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cf2f2e296aa73efc7f89d032a1f57072416dc7e8c5040841dc1b6ceece4c2a37 -size 860472 +oid sha256:008adf5ead892ce6e5c690fd78cb8d5418cb43e171990c4f33c104e589d9bb03 +size 860570 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index df3bb27b7f..e27a59333e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:529b9e79059ab0f5648c42ccfffbc013d45d32aa11f4174f543b8f344d055fcb -size 875524 +oid sha256:2a635e521364dbf06bc0632c83ae23ea21e06f200d6b18138b384d217812755a +size 875624 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 799472184e..731a2237ea 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a434bc3132b195a40e407fb4701b04394cf9f968f51031405b6ae9daa6cf1c5c -size 782128 +oid sha256:d1dfdf885d13b6b111ebe4a84b65097f785b884dd28f508824c94eb4f1eb6821 +size 783016 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index dc9ebfe9d8..8ebcd6edc4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a863565cef81c639d326a65d345ea0473744d0abac1c25cf163fc27df968b073 -size 944270 +oid sha256:0d86e991565701f409958b7c0c1e3c20923fe0fbb48ebd0e9a07807b0c5804ac +size 945158 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 31bf47f08e..1ba772872e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5ee50439875c9a328063db42ead9253a46ec2b215a4480535d395288225628b3 -size 891926 +oid sha256:84696bf481547db02c4d613f3e0c6499072011adf049a3317e02caf2e04ba6d7 +size 892024 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index 60d9742b22..acf2be225e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6c946c650ae563a98b324856c6e7f859f991757a1c7fc85a7eaeb7f16ac86b3c -size 900816 +oid sha256:f3af489d19278a3a2e78a17475f945185885191f2aedaa1f62fe7fc6233c4516 +size 900914 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 86f936a33e..338d6b2126 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:20af95833ff7b98040851b12c821a71c1de47818085556b56e2b170952a1d73f -size 824636 +oid sha256:22c02b018a296c5278c69bf23c4a14e57a3c75cc969597c44cc5ab4132e2f3a6 +size 824736 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b20f7e210a..7743e65ca8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9770c2cd74f03c2bfee97402a948205b0ba0e9a3aec9e4907ab084d30cdba960 -size 902532 +oid sha256:97b28333f91f2ce262c85f6468bef316d14253c9616a8e806e7c2e68925994b8 +size 902630 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index ec315aacbc..9a174d55b1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d266622fad36405dacdd7fc7919aa594a6f9ea8b9e4b5f8dfbc3873ea52e4537 -size 853690 +oid sha256:717705a12eccb52d656def86f2de30d8fe2b0fe56a3fa243f4e3eec5e6b135f4 +size 853788 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index 37824d4f0e..d9b457b4dc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:538fc2450f1556a778598ac7ac4d32f6e675634a04e2a20cb22395a05fb689ef -size 761604 +oid sha256:643c0ba6cdca1cfb5be2a2e8f6f2d285cf7606d625f7b7205aec086ab457df05 +size 762492 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 3dffa32224..468b9fcb8c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3653097c159c508a0d8321fdcc3ae56bdb93c04206eb4f18b9e60112a66f5856 -size 704764 +oid sha256:7c4096d251678c32dc4c90ef4395ccaba23181717ad9ad88414273eb02c80fad +size 704864 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 4afd385bcc..36496ed798 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:37e4a0f8b8472f45993da19968162588c1c37548b54e545951690d2d8d8e4150 -size 929266 +oid sha256:8dfe091ec64978741010d3a122ede19ec00eb783cc991c13db5542ed5a13f83d +size 929366 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 9f84c418c9..28eb89715a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fd376e3b90ddbbdf675b4d6cd3375ebd68f208543d7420c7e0921ae9fa6d4b9c -size 875344 +oid sha256:15b7d678971b59e16c8508cfb28475bcddc5b3518649f0faf2f9e02f2afabab6 +size 875442 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 0d6d974f07..ad063f9e72 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a7ad5a37a49548a8759b7c98b1349da6978e221a081932ef6646a3ee3103f7ba -size 893252 +oid sha256:7cc221422251bd8a6b20e4cd4e803fb1c835b8ccca910209fda2cda18b571eff +size 893350 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index b8fa1a1e97..65f88b1738 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1da48cf5dc6b64183e81bd242dd1f4a56d7d72b9997a5211c23dfe7dc2062f4d -size 844410 +oid sha256:49002781a18bc4c6109a5a72e6f0075803ef4b20c17fe61251f490fb28298558 +size 845298 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index afb43bdd82..99a3b12db1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3685fda35f084474afa502a175088f175d2f6eb0bc1dcb173d15ef0a747ad2b5 -size 939070 +oid sha256:7086848ec330db1ff7abffda427a598d4bb811a60a4c97ebeb86e991bf0183e4 +size 939958 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5064dcace3..51c5e1016b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fc5d7a1e66f867fd89156ee0af60966fd890e9aa894b34175a2334e78feb337a -size 845920 +oid sha256:cc942df74c771c26e1733d8aa55383decebd444519eaeed9cc00fb2369d8bd6d +size 846020 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index fb166137c5..483528f130 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f8bdb8bd940ae363090fb4821881c113da213941f8cd6c78b2bb29e1fcfc1b82 -size 891610 +oid sha256:869041c4c7710e0bb15c2ac78ca4663c12ab9dc82bd9c95e6af127dfd5d529d0 +size 891708 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 9c194f57e1..5feb31419d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:582ce6ffdc31e9a820afd5660ec3e9df2563ffed4afed8bab437be86c559e48f -size 793428 +oid sha256:deabb1d8c6b793a964950db71e3cfbe6a171fb9e87005882ff128d16833f7d59 +size 793526 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp index 24e27dc14b..df25a215a9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:88b831970f16f556a29beedda989f1ad7c9713d9367f3448a82f1385edbea6dc -size 818310 +oid sha256:3f0d93921addee8d61502831f49d9dde8f78ab59dee7f35ab53b9406dd8c408f +size 818408 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index ad0e00a9b2..a6de21d65b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7dc2c041f3e759b27793e6eb3bc4ca3e5d45c28a00a8d71ce1c1e8f6bc232a33 -size 741884 +oid sha256:e480aaf45d8722b36b054cb8e60fa148fbe03d80c8ccaf61567b07c7ad4463d3 +size 741984 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index c679194f71..cdcd8009e5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3c7ebfb727d65a1f1fc623f0968e385353f37ec7d204e7900ba1b55529bbe5b4 -size 725950 +oid sha256:75ff39ccf4581441f416c10d626008bbeebff47ac41e8b595b438687ddae16ad +size 726048 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 61f073833f..10ca633663 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0e863d13654aee27b3bccce12a7c47672e8c49bd8148ba6db168e6d142c535a1 -size 670392 +oid sha256:be5273753a953d2fce58437fda102eb95e25529835c46ad464845dfaf38cac36 +size 670492 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 8016abdddb..0bc5c29696 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:323a4bd1db1aeac65f4adc3ff25778c77f178b507e91c031574e17f87560e14b -size 894914 +oid sha256:6e0d9259db715b0a2e58619bc3c6e0eaa3ac4d484bd10c77a10520728641fe81 +size 895802 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 90eaa5500c..a3e7177e21 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:86641100770045e7457fd31f19ea7adfd5657592d4158991fb8632e9bbdb54b6 -size 805564 +oid sha256:bc8285965fa2f06457a34102d6aeff117a761db642c72d49a8039266f89b2baa +size 805662 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index acc613eb0d..f37aea457d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bb10369863968736ea12f9cfb564ed9d1dca36b99519647cfeee18761821d2c1 -size 850956 +oid sha256:82927d47cea140092b55c06c21f92ab08b65aedc6f2df574e611d97c8b55880f +size 851844 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index ff049c152c..dc5e3b6936 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a2864e1b0b1cdbc29893dd18020fb3c71c7cbe6fd138ac3b8a2bd40d128994bc -size 757512 +oid sha256:e3bd0227c93287627de7e8481cf86f50cbdda27b5282d6e24979b252fe591012 +size 757610 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9b9410161b..1582bd2cfc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:125bb5851bac77d253aa508d8d446e0a843a898a2ea048e0c8a35216db960931 -size 936772 +oid sha256:e9891e1cf64a23334e60017db2306c85c144ee36a160f38c3e7a2a92ffb48c00 +size 936870 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index c057dbe609..fde99724f5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:465df4a315322e582348c3d46c34caefebdc346f526e8e939b1c4f6e53f16f14 -size 1066280 +oid sha256:ee701f5c66a77a1305218fbd68f1cb17d966208681f4bd1a8cc1852cd58db43f +size 1066378 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index a932577c08..31e9227e62 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:735313b4c128db5b2ea51afb80b1a648fef8e3b1497ceed48d642107853ed65f -size 906134 +oid sha256:3d54c9ba1881d04539d0f5c9270b9d0bdd9b84ce536c97bc15fb84fe423e1677 +size 906232 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index e7622d6dc2..3ce8c759a4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e5be5c4347e07810cc3c4e2f6a8210650898c688576a202d8d49acf3dcd0ea59 -size 930522 +oid sha256:e1a9985ffb5d9e33be6a0634fc5f55b704e387934b895d4d14f120965b532758 +size 930620 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a1a859e1fd..0937c45703 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:054c082a0872d423a22058bc75c52a8a4f58e34a0cb36fcd088ae29f45c40967 -size 920978 +oid sha256:e0ea35795d05d46593c6dbf7986bd1f0cbd1d69db98946384067867c00b88305 +size 921078 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b09577601f..959a51ea92 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a622c934e624bfb4aaad48882c4b68638f5e4a465db8bdccc907a670e86e8306 -size 896014 +oid sha256:dad63b6c443e350760ff37ebd0b4450336e04ea1e341252e562fc64741b8368f +size 896112 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 22fb4aa922..bd41faf1dd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b8e4ba287e8b0d2bef5fdf9aaad9ae470cb0fff43cd49440e49debb2168e4be4 -size 933742 +oid sha256:8d03bb7e1e59f4025d32ad9dd1e54ba1347d8c69942c1a5cc8015f719f3cd096 +size 933840 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index bfffabb728..c150ef367a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d52b13381bfbd85b69eb0b7ba02f07743eaef6a5bca228b51e08fc4d81154eff -size 843306 +oid sha256:8804466058c0cfa8d76ae36ae8801f7953aeba1924965882f9382948031961ad +size 843404 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp index 6db3767c5f..2520aaa022 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dec75a9431835f9f52416d6b87a3e6893920362bee1346a5a72e9e839086f0f9 -size 1091222 +oid sha256:a2940c93d3161b97c98ff506188c8a36deb3105d0fe5d57345b8ec1a8ae41900 +size 1091320 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index 9c4d6d2ef7..06cc1e9769 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a60a32b673d646e5a7ca5adee5c16cdc1a340ae7f9d6783a942a920040986f1d -size 888800 +oid sha256:801c5dcc06515c322f9be5114595e511ef0794cc6fbd180c2cfccf222e8b0e66 +size 888898 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 263bd3565d..4d959ed24c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:268354efd082c9f5751cb4407c1a6d396739a7aca143c64b36934be06b621852 -size 903154 +oid sha256:80a2996f1dbcd8230560bc2ac8da510b752a073b9ce3108c6bab17e0190749a8 +size 904042 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c7c4199c8e..9a5f5090cd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bd4ec0da624ea9c40f8cbed779cc625ff4a1de14d22ca80313117de8b0cdf042 -size 815726 +oid sha256:5288fb2048f225227d3d360b63d56507ebdf79376f1b53d2d1d0accaa3a16187 +size 815826 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9311563a2f..692c527fc3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:99b81d6b44ea30d78a608398c7cd533fa24fa6e5813d59b936cc00007ba19b37 -size 1137312 +oid sha256:3646f33ef27703ef3cadb49a4a3ed16fb2bcdb0b43ddf29ce3d0c185831aeec0 +size 1138200 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 625876dbfa..968fa74bcc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9e225d6ee08b0ff0b765fba54bca0f6c7f3a1d4ea42ed90d3bfcedb258d3bd1d -size 1027000 +oid sha256:4da8e7e4d5ec95582b35534dc023bd63619ba8287366597a5b23647003df9680 +size 1027098 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 508c99fcf1..8b7a99233f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7eb80b38111b75fdb86ed3f1a6745d0a8734881b298a082092c2f89ab1d40550 -size 1053000 +oid sha256:d0e7d502cc999bab542f3f0d2f6bb0685a1357876e0cdecd1078a3db394d1096 +size 1053100 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index d89e77ddcd..c7f8083194 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:55593bb3d9173eaa07a4a5cb9525b0a1ac223b98d43cbf7010a28371a72926a1 -size 1085854 +oid sha256:ffb6abdbf1adae947386bbe2e205d36aa761ec04d0d9c2311ae6f52bf83a1b13 +size 1085954 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 02969010d7..6b458c7bcc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d5143b0a3a2ac5577ecae3bdf9923ea033e3da2e89306e3f32ef731d8dec9197 -size 979046 +oid sha256:b458ba1b113b0146a2928f670cd3d40258342273737e192268e5a7952621f1a4 +size 979144 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 10f530d319..92efb1e42c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e5dac97d3cf0d2d60e15711205e28a47187238c3b8c4987c9308a50f2099cd09 -size 903528 +oid sha256:5ad1db4559fa39cb4f7e70476e90a2f5b6975f4cd8d62cdd666e3739adcf6d87 +size 904416 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 4aafc5c638..01ae048496 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6179032e2dbb3fe8f3e4b3abd751be3ec91b77cb0deeead7a7bb0ce696914ac8 -size 1113280 +oid sha256:5b97419ca3cf1ab80ff5b10cbcfb998c065bdbeab3f01cf09a3d980e8643e1e3 +size 1114168 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 71d6c5f421..4fe0d24b6e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fff3bab8c1a89a15d0c7bb525ae96f30596b3d15f9ef44724cbe15d67cd3af11 -size 1002180 +oid sha256:b590f21c2954a3d092c7dfa66644510a1ab496357a5c6d55834033c60ed17b07 +size 1003068 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 0d2120b7c0..82d1d19aea 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f6a2cef4b365739181c3f8300bd6a2f7731c94fd86a1ffc9e782e90fe307067 -size 1071246 +oid sha256:fd9070598a3e963e3eeece48c684e8a1df5437646ae81fa7be061de2b1882776 +size 1071344 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 2bfe01a9c7..d0f178d2d5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9c229b58dcbc2d27edfe4353713941b9a21fc6640ab88d532e43a666ec2afb18 -size 965226 +oid sha256:90a496df39757912aa0bf374d2b5514a1897fa72e4c4523a6f5a2db05a944324 +size 965326 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index e2bd24aefe..241d516824 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a69ca31429129ed793b795614a57f6d51c6073cd980b423a4fa9db72c020b5ce -size 1128312 +oid sha256:e1abcb0b71127fd37412879b604566acc98b6101c8e4072c84f1619b162b8087 +size 1128412 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d71d658b55..9eb2423d3f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:55bb3184a34e12e664e13fc7c8e3d74b163eb6e5c74b2853427c4314d2a06975 -size 1030920 +oid sha256:a93a5d24a48d87dbceb45ca63fdde175533d0750d8cbaeb82592a24aa8322aef +size 1031020 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index a0645b77ab..0b0094fa04 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a97b72755d0afa672d4c80bb4291c62bb92d9da9251020688f5cf642ddaae0bf -size 1019382 +oid sha256:0fb52f75febe14353f899695b0d3949aed030851c29b7698c49e6e0c22cd0a71 +size 1019482 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index e1c8f82b31..45459332d2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8d5e6bfc5edac429db0236f7f10ab55dd10638f6fb3dccd99b813e0ec76105a2 -size 920462 +oid sha256:a9e949944504a53a01b5e5680124cd661a61490efd632d590d9836a7e28b8dcd +size 920560 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index 99f29ddbbe..5f5aa490af 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:21cb97556db7dd16450cb57cc49d85b53805b3b736be060b88ee1d5c42d5f9d9 -size 893880 +oid sha256:79c6fd220df6f69d7a12aa96f01bbaba82ac01959a347ec7aacef6a0c872d907 +size 893980 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 173951a4ee..f2b957fa19 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bfed223b491d340e6d1b856466766313120c78a52e0f6a8a7a2657d5a731968f -size 869896 +oid sha256:658aa6a914e68bb117f5656e773de48999f21258356bc7191e7a743df1f52725 +size 869996 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index d8ed186ce7..6ce5032e42 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b6f9049fbe542592384f2db75f27403d2afb3c9bdc1c5cd306190b405f3d48e0 -size 1074636 +oid sha256:337b4472c748394067c8ce40715b9b7e031eba60d90f03b246e9ddf14b17d3b9 +size 1074734 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b65c102bd0..9c83dbc258 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e4cf9bd4a4cb72bf7096aedcb5b86eac91d00046339de153341e3621e749a404 -size 984990 +oid sha256:2893c0d79428d5e40baa36e9a51e2a2bda80e3b04b5644bcdcdb30994bc11f56 +size 985088 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index 4a93612e31..228785cdf4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:296270d0e61e3ecb13d6f330f0eb070acb8aeac64bd0cbe83625792a729815ed -size 970392 +oid sha256:b3cba46f12813b1e23299a42ca75b9d981a1069b0ec5bfda48992ccffebc086e +size 970492 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 8b84ace1f6..bac0488a02 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cb942d8fbc0a4cc59ad38814a8294695493b2fb91468d671e07c616064e0ba01 -size 878970 +oid sha256:3fec4b5a70921d16a1161a5db031d5934c8d5ed81cc81dcf8954a86ace5a143e +size 879068 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5c6dafaffd..2f2c42675d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2e68e16f7922b029c38548c019c82f3b752e0c6355b55eaa123c24ee0b59f47f -size 1076928 +oid sha256:5887e4149b984477d0273ec996b373807872c553be9e9bbbc1844a34f0c3f77c +size 1077076 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index eb8cc2d71d..15aaaf54d7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:56ff5118c81b5b876ae4d90e3656d6aa97c9762f088ce635d2e5ac3ca468def5 -size 994440 +oid sha256:d62fc8d8db375fcc6435df99d598f296982f499f7f8df1136d166c7c7dde00e9 +size 995328 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index e484c0fdc4..c4c905b926 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:25cd21164db993d1434a22f14119c7e04bbb176bbe3e88d2c166a781b6b6b6b6 -size 960994 +oid sha256:8438fb0ed7822b4c4632487df8903a15fb1c07dc7dbdf23dac648644e4753eac +size 961092 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5442a12e42..47ae7c04ff 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:15c47bc2ae096f0dd97669f4c8e4ab149e63a2f2e5625b296d45659820c5eb01 -size 1024730 +oid sha256:8190b910043c999f51eb8c75fe62d9e94eb318d5fca269af994f129ef9298ee4 +size 1024830 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 5e88da59e7..209a3f9534 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:88fba0a0a855c23821959757c7a4a9f33bcf4c31b829b3e26f4f0d49d861b984 -size 946536 +oid sha256:942fe50b0ced18e2cf0f84574ba9bbdddd3b3698df9b1472d8956229d1e8f1a0 +size 946634 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index f23199518f..c8fee65fc8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:83a62ddc29074068577f4f3fd60a15c3a618ba4b2df8b11c531ef6d6a3148d4d -size 811472 +oid sha256:7f6c4fd52495896b307bd19160a81de73c93bd943e2414958f733f600536cb83 +size 811572 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 4f8b4dcebc..cbb743ac3a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:be82a9433b4eda90012e850b93fb688ac15e4a6e92e5f76218ec37dc638ac40d -size 1052896 +oid sha256:806f0a737f0170be2227942c8c1147efff2fceae27eb27236abe131781829a92 +size 1052994 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 0ce11c5ba3..3ddd9358e2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:96deddf1f7fd8d77d80effbe016d408c5e30bf00ef33051e167edf5f9e8af394 -size 970408 +oid sha256:1e7e3e1b0eebb47a283107770245bbb1424851f1d7366c08eadc854ff1db1365 +size 970508 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 7685492b6c..c4879cacf2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2c1f7378f9220b0dd67eb74f92f28c070b8db690ccf2c1210d4b57807a147579 -size 1010862 +oid sha256:495f1c1a3faa8e03152769ee5063ca305ac566a808021ae2d49206b1395f6761 +size 1010960 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 184b56f856..31d6e607fb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d760157d23eba4458ec4a3f3785a09affcb3bed7b43915ba52de4f4eddb8b4d0 -size 932666 +oid sha256:42216355fac33e4b18488523798748bef7e03f3e77ae6c0eb98bafbf51ae4a2a +size 932766 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index f4be5fe482..e6f42ab92f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:169c02d74ab50312d6571cc4064e3ea20b9b6c28a29f84e14af466826dbe666e -size 1066104 +oid sha256:153820293485284d0fa5c1b86289089c86a00e1f2dd1a564c168df7dff9bf560 +size 1066202 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a0f0e049f7..932ea70ee9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f7bff53f61fd6b9c50fa9175da2010caf82bd2fae7d2365f2ec3c5bb29651855 -size 970586 +oid sha256:f6662cad1b9750734bae3ead27f794159bdf5a4a75418361f54f156aedc81fa1 +size 970684 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index ee35ecdc7a..8efef515a8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9e576ebe6199c467519a2ba488f40cedf040b813a961b929760c2f1134d2ed25 -size 992002 +oid sha256:d5d579396053a9f627d67970e9ab288a3c6888598acd294f7762ddb4d8aa6a16 +size 992102 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index bf74aabe78..edacafa2fc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8694d22fa9088c6e3b72dde1d7312da631e533c644e1faa392e4af1a84d16be7 -size 888690 +oid sha256:65c1e2b1d16c86e1834fd23ee552f5745bb67cfa61ffee7d193bda8ea6f3ee60 +size 888790 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index 2f4151b73f..ae11913098 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:69a0b867b1c596fa2442da4d12de6dae944608553f1c0f7e7b7b669b5f629cb9 -size 833250 +oid sha256:57a15ba41558b3737e24f0a595235b68a4239d62511e0f3ac84252a33281a982 +size 833348 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index d103d6b012..1127015062 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fe88412195fd9a1f497f093f709ebbcb942846ea6156612f54bac012249742b3 -size 777100 +oid sha256:aa782e409adb578ba2541010d5469a34383627d4ec13b3e2dcdf5498c410c85e +size 777200 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 0c776c3a71..a6840f2139 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2c9cfbc50e36907429fadc11834c2ba164dcc9655b76305dd827a819da059586 -size 1013610 +oid sha256:7c1a40f4ff30c2b94975928c138abfb09179cc5c9ba985c56a18a385b2042a4a +size 1013710 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 115177b7f3..0cb6d45991 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:66cf3f59fa73c7ac2823ddb02f8bd3087b273c3af7176e748a3738c1730fc9e7 -size 924654 +oid sha256:c11ace0da66a9dae72e22bc6f016c92a8357bae019e29b31eaa4a493c442c66a +size 924754 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index 72b7830336..d0e355720f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8ce74752be8e4fb4ac54b18fe94575c4c741e7298e6add209d3b5ba9cfe872a1 -size 942618 +oid sha256:9899e758ed5805b59ce7eebe730e5849e2bd8e952f8331aed1406c20bb6a0e87 +size 942716 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index d5fe76fcea..f473632c44 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a13969233b0db64f46041b06e96a25420ad6e30efb51b691f98e65460f1a9ba9 -size 847200 +oid sha256:7e0333a7bb2ed87989bb48118507f829ce835591996da2214ab8a887ed9b5266 +size 847298 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7e8593a5ee..1da974a906 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:effd3af5ab1ec7cfe712cb9f0b01888b0e2e3dd288e458e289b7a51f4a04bfdf -size 1069232 +oid sha256:3b74ba326f0a9d46fe638c85e8e5ac82358e098ecc8456ca237f07daa7ec90ae +size 1070120 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 942f27c9af..d0740f3f5e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ee5e986f28b08b150ca9e9d5879602a94326eeb30b650c802c5a18aeaaa67d0c -size 1031638 +oid sha256:b8802b2aac85ac375eb3fcf177e2f955bb8c1bc313d8ca226a4708ec6a07616f +size 1031736 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9b60b8ab2f..f78d2e2416 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:91a147dba14f39f7dacdaa5196f94ec2282adac6c6f1f3cad67601b1eb55212f -size 1046088 +oid sha256:38d4f16603a3ddad86d938efccd4b119709f71e8b7bbb3b9acf5b985f8d8c888 +size 1046186 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 40f7faaf6b..06e35c9bf7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:34983c85585d2b3bb2f92200736bd0866866e44fee2cc6f8687510dd27883c1b -size 1016930 +oid sha256:fa51b9680df467006cc922a4f7c460947b984752defb3bfe62884c3a85f22bda +size 1017028 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index af676abde9..ac5cb4f6de 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f92b0fdb2c8b4c7cfa13edc157813f53c9eb93051f304f6085f28503b9c1b246 -size 1068718 +oid sha256:d607b10d44ee177440cec5da0c844e819015136e89f6f97d83702f7a6f0e0461 +size 1068816 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8fde43f701..83670b7382 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a8e39020d9982f89d853604c4e3875b6fd4b7fa1286359f8f8e584bcea638c6b -size 969254 +oid sha256:891b5fb2811bae5736387ad529cfa654c0a601f019c9b6f5bb79815f37e07289 +size 969352 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 4b8f2b0e00..840350382f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2aa284b3a233f374a5ce0556d2882c3644d4c731d16430a71caba64bf93f12da -size 1034824 +oid sha256:bb57e822f996b6945044a03560a0c7aee59e40fd57ff6599d0ecc65444aef783 +size 1034922 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4e73f37254..f7c89f4b42 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1d77d72de19bba5bd29da1a8a6efa2502ba97917b06327a942a8b4c3da7e9841 -size 938122 +oid sha256:674e307386aec489a8abf478bf4de14e2e408528bae65a76ffd281ad1ed85ca1 +size 938222 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..b8fb73c8e0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf53f5ff1841eedba1df72045b580e138efe406c6be94e82fba22a229321111b +size 642128 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..d9bfe4d63b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a85c259783cf4819845ba0e04e199b5f119cc50169e75139b123ec7f9ca330ff +size 559535 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..e2cf2ecd14 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e8e4cc8c28c4e2b56ff0747a2a85e299fd53b83ddb4d79e5cde7d9089dc6426 +size 665486 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..a409d2d622 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:072a47a718e2a146b2ed20ef1e54062ce3682fd24e175ae5496953ecd1844218 +size 580625 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..21c5804f8e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:791e023ab47fc0e6650d75326555403cf4ee25bec476e302b83658b4eb374e1b +size 640596 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..0a51fb4c21 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8c2c07aae6450da4954642bad57ed7846da643d41c37c97cb40caf8a0cd2232 +size 559879 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..93f7d3b74b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50b829bd640b6b2af7a6d9f81467bd66254615f934880c9753e4fc36b3b13717 +size 664054 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..95578f4724 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b64bcd7f6be249c21312bcc430777a8de667a92fbe4496f4bae399121262cb3d +size 581411 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..f0eedb4497 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c986a1e90eca11516119469c2eb0de6ff43d366da394ad5d70ebad4f5a789981 +size 708760 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..f9a58cb62b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e28e442908443846ab70f75f6da817b9cfc73afe28d9af7ae0c9802e1273d661 +size 625774 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..e987350c65 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b504dc9ef8c42b33afad23bd7b0292dbc3486dd397f1d99f3e95f1fe4733ed3d +size 731922 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..5b6df3383d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd510b7151c20105d7ed0711a36a3a0ddd8197bf1a75441bb44be4e23a111b9f +size 649576 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..94df9ab10d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e3af280bb9267b6b4adc919d9de6d310a7f09b8e573d4106869d9fede49595d +size 727822 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..18b1a2af1e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4590c35a12701bdc531ce99231b288f2fe1e97c1a7c518489f5ce854c54fa3b +size 640346 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a34fa15413 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:606443ab085faaac24783a8635320e295310cf2259e4e953570e172d00ea7dec +size 750736 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..8b9481c05e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd5caf040429b2d2b98b7b4d990fe12b85229cd6ca77f6c13d06d1784783415f +size 660942 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..630b90909e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f09b6e69951d44f881a627ff92ecdd28f1be9f6f5075cc3588b6b0b809728da +size 720668 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..fbd2d1c013 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7392ccbab7cd524043c5453fd2b0ef222a4c0a3dd6fa21c1e2599d9efa65654 +size 633192 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..11152d3fde --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:353cf11ce062672e9b643e80484a2ac8677b24287ccf91e04dc7e0402d9fc410 +size 742794 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..0d6f2f1f7d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c6ee347b2ea3131c726f15536614384c1051d4e5b95933d9d5555bf08f7e083 +size 653788 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index ac075fc6f6..5a2312c4f7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ab993ae762c971811a21f3be875a813fdf04939211d0ebbe48e97ea108b487bd -size 743888 +oid sha256:1bbdd96620f573ca53ac4080a4922243424ef658ad5216d651e2b7d36b3f754f +size 743986 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index dfbcfc816b..649d3ebc98 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7474604965beff9b3071ebb146ecdfcd61867635f7944a7b66c83274c2142abb -size 767592 +oid sha256:e3000a48268a807a700566a1a6cc26249774acf5ed09983cda6edf4272f15e84 +size 767690 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 38267ce6e4..16c89560a8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9cf34a9d22ec2d49963adad174f83ff322ea72e78b03d859d7b4585f307bf0ba -size 743190 +oid sha256:7873f4f8faa5d40842efb569989e5d2ae6919a92bbe46d686eea2bae3cd24050 +size 743290 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 6d66c9bab2..5736a2c250 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8ceeb0defa4398388d8ab4be32665327508dc5b7956298d24c8df70626d86107 -size 767734 +oid sha256:1e0dc9b4ff3bea6b98cfba164d86d3ba823cd03f378ff728fac587572cac80ec +size 767832 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index f16e92b56b..bc81dc050a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:50f4655953371cb281e721e627eb904183ed21b4cb260c06a76c62c2caf4285c -size 715204 +oid sha256:b4c9f519cb86f68f5f38a56ce26d30893708666215ae7daed69edb204fdda1bd +size 715304 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 5543b146d2..de93654522 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8a0211c7d17758f8b6c79b407d3170171e58cc8dd08d93e0b2d19436a9c9f2bd -size 631428 +oid sha256:5b32bdb9fee51594a64a5720ec70562d5a333b9ec828c7318fb88161b09a2d11 +size 631528 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp index cbd5ae769c..7c0a43c524 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5eae395d3262587c0f66f4b494a6e1ef4a9b7f3227af9797acfde31e4e78d9bf -size 741524 +oid sha256:2fbce0e2e3253802d1fc7e0456698bbb82e2d5028cfb7418d51a3805076263ac +size 741622 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 515ae6e0be..3d3696e7ec 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c90dc668fdf2b16948ff5275c09a0405b1be5d1538f059dc60376814a774ce25 -size 655132 +oid sha256:5c685ed45bb192a9d7294f456a29ead96bf088d99a8cef6d96b9c1eb3e8e84bc +size 655232 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 34e4a26ef6..45a44aa65e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e3bf434ac8bc9405f9a8562cb59d45147687cbf8999ac0563c65be5d17adbacb -size 736734 +oid sha256:ba1f6991dc6c7f3237528a8d787d0a2028cc7deb5432a4491b4b1f948299188a +size 736832 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index a647338377..0ac21c0951 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7f3e19f24d36439513798bdba3ee5da4285ad8e35aadb2881269d7c9f9239a84 -size 760438 +oid sha256:d77e83b7e6025b1a417a3edd21cbca709f8e0475488859d8c7b3a2e7a6c5ad92 +size 760536 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index a494d69879..52ed02b04b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0cd6feef8e279fd17b41f97737d21edb6b554eaf074b5d238a5321f53e5dddd6 -size 736038 +oid sha256:96ef8b460512dd42e32e82fa8a1be7435f6c53d260195169db94acb7f188e699 +size 736136 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 0aa19d1780..641b0fee93 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a4cf215b87aac446410af3e73f5264ec820369dadfac6967b521300da3ef8114 -size 760580 +oid sha256:3d1072c5c8652191ee85ee07bdfbd797f4aa1b8b12649ac6d64423806c91f970 +size 760678 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index f587b35b67..65b7d36254 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8deebe5038e2724ed0063108bfc8c44d9fa1df06f04a5de85e42afb375ca2b60 -size 708052 +oid sha256:b7c23f26d5bf84857ba3c8069550d99eaa7e00d9620901a6e135e49638e1cf84 +size 708150 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index f04e06bfee..9e1275ed00 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:60db54c124cfe55e8657a6fc0b3b6dffec357675148805e49ac6dddfb898879e -size 624276 +oid sha256:3785aeb38459a2ae2455d0495de11a606fd0cd68a8b9c9de22dccf439a28f22e +size 624374 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 8b2fbfb01b..ac767ac31b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:07b516b5c04777863214dc041deb6f0eec96f2a440013c6a540edc4f8a91ebdb -size 734370 +oid sha256:3daa5c4e068e3a50692998c12110744163bb244d2b504b0917897d2725683c20 +size 734468 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index a841212b6b..2b29f1ae7c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2c4917dee904ec502ac1bd747d8aea718010f870aafad0367b99662c5c8076af -size 647980 +oid sha256:cb3facf25055ad72f71ab6ee5d543e7a14640ef8a4e6641a613c1875646a8856 +size 648078 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6514dd30c5..ee077f55ee 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b832240f1c9822daadb51e626e720d418538bde42c0ca93b95947703ea0ea0b2 -size 628788 +oid sha256:d7f3adf983e3c5fa8852a35ef0f5d1c45cfa32a33c17e404689b6197bc92972c +size 628936 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6a4ef6aaf1..796ed1dc28 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b4768f470f4bb0719c19ae7300aa87544105206b20333e267f38e0dd192bd1d9 -size 585865 +oid sha256:3860b72f6bb98dcdd9c4aaae322fd4269e3798375fd8d1222b86ffce2ea22870 +size 586753 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 72f557f619..ef2abfad43 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8276e135747d6fb5b01f72706d73b46c931719cd5730464c7b62d9b47c8f7345 -size 550519 +oid sha256:25335d968db92c0fa580269ed431d54894b937cd167b9959807fda2080bbb251 +size 550619 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index dc109254e8..6755a891ea 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd740be0ea799d5f1b7937739412196a00e6977a53c2389927e8077f4ae2e35d -size 512629 +oid sha256:7b306426a88c200af3e9e9b4b1e65c80745169e5e8ab9faf253d23cee60da5f6 +size 512729 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6034ff366d..f0d1da4370 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4a48d116e9f95a45d502d5ec93acec48159a4172c080b58c953fad9163024df8 -size 617731 +oid sha256:2ef00e8d10d25f8d9ca9d4c3f94447d6a1c69d06ec197dfd4c9d63ef012cac1d +size 617830 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c1b5b883db..7a46fb73f8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8c37ceed553554cfef3114282d46ba99b9d62781c75dbbbfd3a8c79b896a88e1 -size 580335 +oid sha256:87c187c26edb1aa54277ba88e9ede37a7766deba897796ba69430b2d27c82f92 +size 580433 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index ade46174a8..83b6897683 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b04523fb46db6e46935c85fe4e31d0355d0bbf9aaf2af7c90bd1d87400b978df -size 537243 +oid sha256:363b924b9adeff4aaf6c394f4b857039438c6f12f1daf58ef896e7d632e8338e +size 537341 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2bc41ecc06..365d63ca59 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b527d45b41ba28fd0f4034c132b9cbb44a770af0ea144a77c52152f30c3cea30 -size 504435 +oid sha256:e354e1902d25fd741ae179e65c0f0e32be4f43881895a75e58bec60291c90600 +size 504533 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..ef9c791307 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a2b6d087d9a48aa92af4c3f864dd1991a6ba270c36e3b60d8f20c9cce6a48a9 +size 723972 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..c21f879f44 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04e492af079fbb35d3f0944c3726b6e9372485756dccc072f32126ebd4c72570 +size 640986 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 6c78ee0165..b33e64733f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:052a6d929f3887b8629607b03d06a1df074b36def44b72b8293532ef81a8c6ed -size 620776 +oid sha256:cac1719977fc9f092b9c13cde0dc8e63d0d93d34ef26f8852a58aa30ba6cc848 +size 621664 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 621e169d30..ae0a21795b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:953e436a81d2ced0a873578c8a60f7f13f246a2af7634fb52d4b45a31f0e5b17 -size 529107 +oid sha256:5a5ea7fa65adecf4e93c6f8330efdac005cd66e503e0e537b838d8387ebf13e3 +size 529205 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 56612d3684..5203e6b9c2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:091e5dbe02a4f5454e15891cd282115a419282b905548ce11ee529cc85ea8708 -size 577311 +oid sha256:fe31fde23c455860fc53d0a4f920ac691654fdbe74369b5677345a5a683f2598 +size 577409 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 9b02172a32..2c5557db76 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:02a734274a8f4b9906d41e4b8e2955a582d7a54ecb6c484bc256206172f85138 -size 496693 +oid sha256:e8247894bc232ebd094c6a00d720c85b6b4980cfae33dc6e8352e7baf1e2a679 +size 496791 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..d59879a726 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c39bc706a492889c876679cfda7b77d8e654a86cd2e77556e596086031117987 +size 747034 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..e0905e4804 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94d809b6504d916084abbd37b093c8f868c61a62cf9e4ecdcde68a423920193a +size 661630 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 1bc1aaf884..56451c8105 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1866a0d216e0c508b08a501eef65632b7deacb8fac7134d4f96e6f17a642e96a -size 557307 +oid sha256:2764ae021eb6e16b19b2fc334f2576075d68dda6b25d046fe58f25c4f0e1080c +size 557407 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 0bb697c9a9..3d41014fa1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c332309e8fbc0c83fd5627ebca1c4cfaaf7d3d0589a7e1795d454286b07c0e35 -size 454439 +oid sha256:966fa2a9a4826002c99ad7b02e32d0e3d13ccc4f6da587e91eca447917468096 +size 454539 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index d48cd1a04f..0ed68bbfd3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f88b9ece45f6d802df2f54b3ce536acac80e335abddf7d91d38993423116d6ce -size 528199 +oid sha256:89d26435329a458061da0d57f47b532ba3d0112bf5be4c880fe02aea55459a39 +size 528297 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index d3e863e681..e776fc7a87 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b75a19c4393f546e3f528d55aab89c0e5414c4fed832f3084492447935dfa64a -size 427403 +oid sha256:a45fd4e42c61295da56d4edcea75a96ffdd50b98d287170b6ade1d82c308bd42 +size 427501 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6ecf744fcd..54dcb9cab5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3ef8dd6ffbdc8186a5afec118674b433be3d0ec28463f789930ed207f2450a52 -size 622424 +oid sha256:8bf9736ec0e00eb2457eae514a0e123179e5b4021c4c24052cff6aae8439c6c9 +size 622524 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 1f5fa61aed..69eb787354 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bc10b89fcdf6c022e8d6b48c7aaad5b943c2d8a5846be4213dda7f08d71e2031 -size 578713 +oid sha256:675a215e9dbfa5a67d447fd30ca68713b5942b7820a8ec5ab6950cb3981173c3 +size 578811 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 1f18ed737e..bcab176dd3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:39235789744522723d361f402a33bdb2af8a85ff8e133ee5e537b40f3d8bccf8 -size 546573 +oid sha256:76f5631bd0f6ecdf04ccdb2d6c8e670f19c940dfbb3a69411e71902f8ba81c38 +size 546671 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 20940a9a3c..043a88ded7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6afd445ab1e85d050750a6ee7717af8fbf313e6ab362dd2ce8f6b949307ff66d -size 509423 +oid sha256:5377355d31bcb1570084a72824f123462217c8bb4ab1c4533e2a76549bedb26b +size 509521 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 2f4aacab89..2ba858c7f6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d3b3fb13190275214f2086cb25042710f1c8216e60659e0a4e7b366e824a243e -size 610577 +oid sha256:68d600f455662adda64a1918264ab5232bc814090666557753c2bdc447da3748 +size 610677 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 78d429f5d1..968f07b72a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9477f8e96c73205eeadbeeb8483a9b90e3ec0b61cfa54525418db3955be4fb92 -size 573181 +oid sha256:63ca9ac9a30d0ed596f65b62443a379d0a0fb68a09db6cf86b9d6477d37007d6 +size 573279 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 4266726ec5..b580c737fc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:514d3c73cc56adf3a71b56238f45233c5154e5b235451df4cf130e647a293000 -size 533297 +oid sha256:403f13e038612c0050c4d1335d4203451e23bddda8cc58f6d1c5512bcac77d0c +size 533395 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index e60a3260f7..401046cb15 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3c80d16e058324ab9ed6ab4efafea922656ead4006c9d4f4027cc7d4a50a8c83 -size 500439 +oid sha256:d07bdde43c55ba663dec2cb5e5edd6521e7dc822dc285857996c7203b77c5f8a +size 501327 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..dff25d426f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f0ef444aa18ffdf89274ae52ba8760867904cd6ac94015e9fc710f1c44ff800 +size 716770 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..0fc21787bc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb6f0af30a53cfb7fba25b47338bf546999a7c69085e6fa1ecca8db50da1fcb9 +size 633832 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 602615f10f..68c2987c64 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fea4195281f451094372ab9230e85fed990c6e0956dba2b375be7e5897a42e77 -size 613623 +oid sha256:82b1b084217c670101d66629c814947192aa4c40ff80736cd853b82bd399f79f +size 614511 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8a142340db..8315d8dbf0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:456aa934623cbecb08923d50075c828714a101f03434270d63d5803c328c880b -size 522003 +oid sha256:38196256aba8bb9ed816469ab22bbd2694f526347f86deaf884f09893ffc1b44 +size 522101 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 2ae3f5e1bf..835bbf36d4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:83ab90531ba7315255a79ff1f555de31524570a7cbaa874d302fb7877bde3581 -size 570059 +oid sha256:12ae22025abcba74f6c6be0623e0931bdc2b73e8c891d49ba50cc14faacd974d +size 570157 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 765aaf484b..fa1a31a825 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c1f652b50924fb57508e4b43f2ff81fa7c76afc4061c8bb277552ba3d518179c -size 489539 +oid sha256:bf997f5da4a4ebe741828a25140df873a92dcf6d866deac5eea1fcfff68c37c6 +size 489637 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..07c66dc0a9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27c889441bb0a2434517f4def63b6385bc9eee56ea751c4fc8e86ea18cbc5d68 +size 739980 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..33566e3b2e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46d4c013f4c7d2ff2a4e93c74169103882f4113b49398b84bf8d61acfee72ef8 +size 654478 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 5f750edd81..fe26c30995 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2fa195276bba39a92ac5196446fdb46bdf2b67bb4cd6010e4751a1d4c395f849 -size 554101 +oid sha256:2b7d30e4a635089fa2af088442067e149c955f8c99a7582290fa7c9106a4d39d +size 554199 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 28a8778bc8..e317b178c6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0aeca8f4b158689dba54f49f835f59ae6533beaa7f5708006a65aca562389070 -size 451283 +oid sha256:a617a26a41abed6b92a5397e7ab9a29228e3baa5fac4f86865a47a98328162bc +size 451381 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 247fc45b4b..1783b0340c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ae119952da18419f357eb0ee1f2cc92137d630205d8cc1b6e8339a904f6f424f -size 524993 +oid sha256:541635fa7d997ec8b5904eacfeb80521ed4c734a04d8c9ef16879fb6d2368569 +size 525091 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 424a59f53e..c7611f82dc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3fbc05884af8b92266bd5fdf621fa136aab0f803dc6b07f89381a8b982bbd166 -size 423457 +oid sha256:5faeaa8899c0642bfc4523112d16c5b7d1378222ad7a6675c87321138a2b9dbd +size 424345 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b80efdccf4..88acdd9df8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e4920b104f5c7a25b9cdbe4077b9dca07e37581a7d152ff93a8192fca328954a -size 647520 +oid sha256:9a4a7d268c8c85bc3ba54479494b2bc4fe9433223a5ad5fe30caa2c6eae24ec2 +size 647618 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4a857d967e..858838958b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ed411c30053791af49792e1a8e7f26720381fc2c7ebf83eef854a968c0874e30 -size 592659 +oid sha256:2d3001e1b41ad290f635f5fadc73c971190fe34b356e217682ee2e2f77a8ba4b +size 592807 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index e4f78374e3..1bb043b085 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2c0d745563a06aec72d617ace2861b48128658b08b9ca42b5ddaa398dc26845b -size 572013 +oid sha256:338262fec7fbe3aed7e691df1be54aed80ed6cd8a0eadcb2792e5f5f82dee4d5 +size 572113 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 1f57e56f70..1f0c9571d1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8a108e55040c68d91f2badcb8f7d2d68231a24f80cb4c30b01a59628fbc079b9 -size 533335 +oid sha256:5f83e187892c6d5037428bda2fb6742c13412a189bbce91b35cc60572e680b57 +size 533433 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index aae6e7c19d..d140ced861 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4eda41beb1e683cb53558ad0f08615d402232f269af96f211cb7b7b3d431324c -size 637204 +oid sha256:3bc1d52d55a21b98691697d4cf97a8296d01a208c52933dc2462de9d1a2e22eb +size 637302 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 29d20d34e9..16fbeabb75 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c65e78488666e4a8b27a7109e50bf938dc6a0f6327b03031468d4fdd40a9f371 -size 599213 +oid sha256:4ed2c80442c18091ff0a9daf6b8a867797878b997ae8db507b152febefdb6035 +size 599313 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 160a89a53b..525263eabb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fd475a96f9e55cce5695dffe925580946aca599657c41c2d1be9e582ba950a11 -size 557947 +oid sha256:e7ec2dfb1f5db23c2add6a47fcaae6596fe0b04d7ad7b2912016be1dba8ffa44 +size 558047 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 11fc1fb14c..409c90ab70 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:71a4f38723bc06c5adde22dfe4ac29bcad21bdaeb69840a143b80a09a29e3fe2 -size 525139 +oid sha256:93e1cec68085a5a56b4a102cc9930f59e6914189fd24dfce06a9318e2cad02ff +size 526027 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..d3a2170502 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a09c5dc2375c9d54e2ce1f36d354bf7925ca781a3f2fa0684f77be38667a2da8 +size 796032 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..020855699d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a97b66df489e308fe11888f93d0dc9ad4392695556fd6919105ab65e83267d4 +size 706436 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 660cb35371..7d03f1787e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:16aec4240b4b7253788c397afd199f944fe2873680d5bb4551cf305a91db82a2 -size 639558 +oid sha256:c67eea79b15e74100d5880ba423c0e5b3027a9c3d3ca9c660adf104e29c23558 +size 639656 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ac233599a1..c32578d7f9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2943128c896ed50f4d73df9e2a0f52fb8b1e9ac096f6a7fef5be474cbd4c4450 -size 540783 +oid sha256:89e9037fa8a1e81296ac166114610df5d689744a25f16b8cfd9f7acc9a4e5f16 +size 540881 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 74f0a3d4ab..5afefbdc24 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b157605c533d45480b8c050b5b7a5f7213905c2467d4fe220693c36a942a0fe3 -size 601963 +oid sha256:a6990928957dd4bde176d8c15748305c87fc3b6035164792e2fc8b7721fd27c5 +size 602061 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 3b5d94441e..5176ea52db 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b73fe306bf1a08938176a06c8a1e0aa91d11b9e8ac03be0ab45e8b6f079ab756 -size 508369 +oid sha256:8fefe70c373d187d5e80babb150127047270ac0578f98fe5f6f1857bba6ffd40 +size 508467 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..d35c71a697 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cd3cf45ad53ca9f963ff02ae34cd6e7afde1de9726e9c928971d26b9859ffeb +size 820280 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..5fd6f5897d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:520cfa9e2966079c7a493964730d274a0c09acf433dea14600d13b916d72e3a6 +size 728560 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index e0d4e9b2b8..01efdb6740 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:61d73af545b9c3353b319964b6b1aedbcbfd422fbe3b324428e6b550548e87fc -size 578407 +oid sha256:7eaae85393254d1c6b1f9359e4ec8eb902e6c73d0b14c14886e0e3f57105d8ed +size 578505 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index ed1ec7acb8..5e4a82f4fd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1050362d25ccb95f5a8e0aa516f525f62bb3efaa606d6c30240040a5b4801d3f -size 473417 +oid sha256:5a1f8c24498d51dc841cee882f05bfc24e72fe79c5d9a3a0ece624809b83fed6 +size 473517 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 3d2a5ecdd8..0002fbc76a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:858520e3157e59cac827c8864129d11e618f346a639c4b35f1dc86b310111062 -size 547177 +oid sha256:3af37cde8c5e7182f8f1b643c6eacd51741bf793a24b730b5439fb3b77fda3b4 +size 547275 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8973dea7e6..3f90cc1b60 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d10b892e066e0837d451c91cdce28a0a5fede70ff552fdc5b7dd53d0b0d1e3a5 -size 445591 +oid sha256:78439bf5f28b671e45b21ea88772d136ab23a61a95661f7d8cbb7102d036888b +size 445691 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 626ef24e04..0240b6531e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9e6c07a9d779907cb9d8c752fbbe2e0201a5cd65e5e4e278cde1882364dc8ff7 -size 640366 +oid sha256:916dbfdd646dd0d3520656e4a0f3db3aa175a7fcb7718802107d6a52658068ae +size 640466 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6fecc8fc43..adccd65c70 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cc0533e6ddce908103f86f8167aca12922affb06000b764da477765adb6453d4 -size 585555 +oid sha256:7a3d3f3a7ea12185aeffd97ca17355df3dc6dbeef1beb391fb9aafad94dd3a47 +size 585653 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index b6bf2e7e92..9a064a7915 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:919a7c7625db9fbce7e5fcfc404b1ee5eb71296bf330f54697f99a1e95726953 -size 568067 +oid sha256:fb7f961fdb03b48e84f65a6a9f8c82d8c06b62b878fda8b54174830802ef513e +size 568165 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index a997e9934e..1708d4a7da 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3ef54d22ebf556e93726f59a1932b990fd5ece2e88140b4827aa10311f96cdd0 -size 529339 +oid sha256:982cf567ca8e65e1db501ecb2b007f90a4a98dd33d2f8cf14dadac0b0dd3d786 +size 530227 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 1945920aff..c04e29c9fc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d0a082d4b953c8fd0193a0be8662866b9f482d57434cca38897f9e45b8a6cf0a -size 630050 +oid sha256:1087b5ed433e8ab0dcc3c59ea439e725c8ab3c752adf6b0d187e86f55fc3af10 +size 630148 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c32241996a..22811e0b33 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ca51dd076747498e4522cd608a862f840fcdc5465ba035da55a10a012b49d45c -size 592061 +oid sha256:54d14e0c6e1f69829921aa0894c3a88b01b2680ba00b1d9161909e88db96a847 +size 592159 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index d9db0258a7..6bdb09a002 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1ae8ce16ba5f0c9203a0323dcfff8633ebc69bbf0242d6b84e3019ba85f4424f -size 554791 +oid sha256:3d1fd49c618716f13df1017124e011a7542cdeb1db2308da06bda6ab002c0e19 +size 554889 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 376161c6c2..1b7a10552d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fa0f84c46addc815e7e99b81fda2c6a5e5ee4730f7f7140cffad4def6857e805 -size 521933 +oid sha256:173df3769bf21cd28d644ddb62b7973ac309d157b7620bd435cdbae1b877de40 +size 522031 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..4ec88a0cc2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3faaba1e91354cac34bf91f72020983c213df899c461a5146b2550205b73adfd +size 788880 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..947e078d79 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06959eee190ffbf2eb9decec870759fd7d651ed64ba4e1299bd771f8743e53cc +size 699282 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index a710e34689..05ee57141a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ab846f3af5002f31f78a682f387143126a050f30f0c7fd35e328c41e30126241 -size 632404 +oid sha256:025500e3706a765ec1633bd0d13621b7a44c44ab3334963dc7d6cdd176a6132d +size 632502 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3ed7e06386..243cbed84b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8ff236c218555ae06fb9a826129bf0e560e66d937f85c942a5d4ca4dd44632ca -size 533629 +oid sha256:09ffbf3e672e93e3bdfada396dba38119a1d6dfaabc8bc7ea8ecaec20f1aab65 +size 533729 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index daec50c872..dc18e95921 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a2bf06a2efc5f125187dca8f288a4eac0f942362e2afaad3cf8bdd5686fa1643 -size 594809 +oid sha256:968e72241ac3b829279478b0374c8f43e61a52e71bbd8379b17c62250e645316 +size 594907 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index e422d5aa21..f2fda1af5c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8139bfebf4b9ab8f06de005a56504d3f51f2d44eb79ac5e35e5b0c7424e97c42 -size 501215 +oid sha256:57da4132d2c917568a2bed3cec9caddbeb9249babf03d33f1892bd29f61c9c42 +size 501315 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..8d16f450dc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd1cdaf81b3da73366110e094e7ddf70d238bd9a8aadb4de0752bb065d536026 +size 813176 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..8bccc7e6ce --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abd6dc3c8b3d45c1f23d3ccdce35f492740e654232a93b1ec76260c9a7193cd2 +size 721408 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index aa7947b772..db4873ff1b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ceb4b7925afd9b09bb7b70562a189cb48d558fcb13b7979f2dda399be456650e -size 574411 +oid sha256:b8e088048a71c2ce75bc2017c555f5282740d0f3708a91a74e779884ed049e76 +size 574509 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index dc3a688070..877e30354b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2feaaff0a69af445b78e20f4d2e990261745aa9af3dc77506233d6e4c79393f7 -size 469421 +oid sha256:02aa43a40b190347a11909e1ca9922a3d40c966e101c9338e9fefec2b4ec80f8 +size 469521 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 9d8bd342c0..550da4b3db 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3eb75e3395f70170dea605065c818b615a004c1067b0892c3a75f8cf3315dc10 -size 544019 +oid sha256:97ed8a2002f196a317b65aa4592b496fda887265f613102abcdc97ed968ce34d +size 544119 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 1a4b4d9c6b..3b8bd60361 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c61bfe46bacd78c09f45c35fac709141b9b6d3b7722d5a815e2583b98bf3b3e5 -size 441595 +oid sha256:25cb7c2f37b7261659f5f528f48bc99cfa1b621bc9656b2e7db67525d1a35b21 +size 441695 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..2c7eb851e7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:952dbd3321764fb1a05a75b22c108cf47719a4d18b72849f3ed6a42ef8c3d87d +size 612971 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..7af8101362 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34221a0a6d9182f6575981ad21f523f1fdabebc8250c56dec2026847c57f83b2 +size 525495 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..d76b203369 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0559e7cfe0d197087951cfafe12281420ed63c53e0567554bb697350c366b343 +size 612723 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..5faa804ac7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bef04d078ac5d046bb57473eb6a080fedc45b2d6c6c2b3d5a12921bb681d7c3c +size 528799 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..2419ec8882 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0714c2430741f595ab9042b00ff2c82e344afa07933bbcc3edaa45528ef930fc +size 680098 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..af71081f69 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:361f19e8861844b7a6f6ce4a832dd6a543a5416098d58452ab3553b820a95f84 +size 591979 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..674b9c868e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57354974ca5d3e2d8cc521c1ec6c927c57bc5478e3f9b47150c537b2ab45ad9f +size 701232 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..24430a7969 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed3b9f8f795615e74f0201cc4ade9c8eabf7e0258036613e93b72353be0b291f +size 615087 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..4d84e7a26d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6dadbde6c8b781727aed7bb1173a35616940a42a994d42564f8b9124df503c9 +size 687714 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..285438d5a7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c455959a6972a18263d37a22eba879d2029b54c3b60ff4a0fc5697ba508a035 +size 601569 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 2140127e50..ed0165768c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:21e953b6d4efcbe3c7864a78fc38f993d7edc1014bf3621a8d39414b8fd2d22a -size 726568 +oid sha256:8052b95cb8deece86290242c21585bdb27ae55dbc61fc0bb73578cc851409f81 +size 726666 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index dbde2ca6c4..b2a37b16d4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e781a9cdc43fab66a1421f509b8a2cb17563d8da86b17115d06c5e5228a4885b -size 688628 +oid sha256:0dc6e15739302a95dd90990dbaa45989bfa29af202e05071d13aae3153f98a99 +size 688726 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 2e069854c2..3e75b7a58e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f9e1b94b34d6ff1849e683f00f1be4856513d09abba76782b5d85a6ef1fb3d5a -size 708998 +oid sha256:aa7461106cb963208366db9e9cb03b10f58a26057ea2f074d12cdf3c9051732e +size 709886 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6ecbc79419..d2db83b8e4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09c9c971f8f30513a5365e81bbffff36c41387c2950096bbaa5c30cff48d2bbd -size 678360 +oid sha256:b41dc79f21f00decfd44035c426da1b7639ae1bd8692aca6fb18b21dbca523bb +size 678460 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..111e64ef9f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1c90576cda4cf62a9e084307a95443d8fc17d7a15ab4c0ef59d9517317da212 +size 701870 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..402d6e6b0f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3921c8623334760e24e5ab77b65f1914cf279ab93d592de1511066f35990eb53 +size 618982 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 35fb9cd9f7..da258c4d2b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c7f78f893481015582fdbe4a81a019e6272bd46a99d5261956022e6f0c6e6be9 -size 718456 +oid sha256:0c98aa544f0766a9398ade4f86691c9148a8e3afa604d0ebbcfde6ce6b8cea99 +size 718556 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f9cc1faa00..cfe792d8e6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d5282289509bad59a09083e008a6fd3f34129719ffae258ba5f41f61cfa957f1 -size 624666 +oid sha256:579885108d578f2f4a05cadb7a36f4e6af015d1360079187d21d048016820050 +size 624764 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 771e302210..7b098eceaa 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6820d577fc3c52ece14f0bb8f4362d6a1c27ab8fa67374b24a422e655cdbde4b -size 670206 +oid sha256:af220b7c045d8d583cfda25e63bd7bbb8e4f24648c164d00bc6a2d4888e78567 +size 670306 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f5138ecc71..a38bf60021 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b27cb36cdfe60b9b164a4e05e65d5e8afc7375b3d2621850128ef2e3f79c1969 -size 590229 +oid sha256:0b9fdb20faa8c062dc847ac6746f66a823956b294050dfe55c4982d119eb9f65 +size 590327 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b93bfd16c2..cef3ac67a7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4f75bb3b7ee8bfd59e53a704921ef4c42b5011f5cb51b2643c1fea7fdcbf9fb9 -size 712260 +oid sha256:a5fa5922615b90055fd288819cbfa373b0e83df7409637730e8634efe838ffdf +size 713148 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c2d4898a19..d820389cf4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7a34e0421ddf3ba88cdadef303a5fb270368654d856effc5bd333011c79bdebf -size 675160 +oid sha256:5d9759865c587b7092b1de6a0a2cbbdfdd86e303bbc052eca2ee1f318ce9fe10 +size 675258 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 23f8fbd1eb..519eaac3d4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:479c5913fd9be7798acb53f41981414018ae8d56770592b02da59ff8b37e3f3e -size 695482 +oid sha256:72e0b7295d7c65e120c2674681267db3d78ab22383b12c35426981f9c900c806 +size 695580 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index d3798ce5f1..7beaa8e456 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:706ef087dca671617eb0e239048466a72e7ea017322328369fbf80c8a36f2329 -size 664844 +oid sha256:f7eb087a816a7855769f413bdcd050eb6a8be7ce935cfeeab5882716c8204710 +size 664942 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a2308d680a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1876d8ab1c967a1961c616def61c5141cea50091d94a97475bcce8bcab571fb0 +size 688354 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..f388809a4a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:305f8a118e97408cc6bacb1d4c407734dafea398b6be3d8709130e26cf310fd2 +size 605465 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index b2318a0963..0b553b9797 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cf8dbda64922377fa2e3200aa3da6d865f097b7379d2859352aacc5c2253a697 -size 704988 +oid sha256:d1426b1c7586196a1c59d550bc3c8f4827031558cfef821963f329540edf0faa +size 705088 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ec9cbdc2ed..6e1761bff4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e6b5f4f45eb0772fc6b14ab048b1a135861f1a59ec7bc500952284c6c45186ee -size 611197 +oid sha256:ac5ebcecad3ced83eaeb11a449da70ee53c0693397cff2b47e138e48066155a3 +size 611295 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 681c691777..de2801fca5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:514bacb8cc822b95197390ad5a120ef33cf8ba21b57121c5743e76115fab33b6 -size 656788 +oid sha256:e6658723311b1ce7ac27528a04f4275b7ed4cf5d4a1a9c8759010fc54f126bce +size 656886 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2e87a53a04..3ec1f47a57 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ee5eedc434df550d3595cec8047e6d24d9d945ca61a2d8ccf32bb4230e869bc4 -size 575971 +oid sha256:412c5dc0d14793d7549ecce49b2b080f250671f3252fec9ca03ee80012b9ed8b +size 576859 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3bb9ea738f..1b3cff46dc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:16a13adf7903943cad0a7157dfe353bbf710891a30e951467af6a01c29dba668 -size 745398 +oid sha256:9a6bf7aded2ea6ead6c9a2fe5203cfc54bc0b429222eb2612180db7fc21f0b4f +size 746286 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 1d981cdbb1..cefb0b3fdf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f56d748be3fdc36d92ba706270466096034a379fb25367d6fcabd868d302aa2c -size 707408 +oid sha256:12e4eeccbb659cf247d631b6c3b2d6b21609e97e99a8d20ed0e6dfc0db8f825f +size 707508 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 808cf250a6..f8c7294696 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:650a9e3241c710744370509b4a8b40f053c1074a33f2778579398248b48b2afb -size 729408 +oid sha256:4e734ba966b4026ba7b0de6c035b78a1b7cda2e2a7de02cfcf52413235f44300 +size 729506 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f75324ef20..639f24f7ab 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aac0011a0dd6181101ee08faa26d2ec3180a3c57611d91aec8d148de7523e3f4 -size 697882 +oid sha256:d1c5d074d4b0bab063eb61db47d6a9763f0f3931a13c2b8903cd6638a0b1560f +size 697980 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a95d30bf1e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec9b0ee86ab9009bb37f76d592bac7e324637f1f9c2bd318bae7e9eb92e5e7cd +size 768356 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..ff90c754f3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7b73b83afad5fadef8d529836a6f5e6d49f39d820cfdfb6c056c4de3384f847 +size 682460 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 8dcfbbfbc1..f08edacb11 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cab7675da413f612d7158d0e8722fe8abbcf55471de883327fdf7372bf5dd3bb -size 736448 +oid sha256:3bffc4c6f180867448b71dd09c3e1e53fe52d637f640956eda2ffd46905d7f5f +size 736546 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d90dd4c294..5c9701b12a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e5d7d5ca57e864ca3224988ec02468c0da098d2c8262a9fef3961c54515b89f4 -size 632150 +oid sha256:1bc0a1a8a21af7ccdf4822e416f4f8c05a0be356a7042faf130e1c20588c3343 +size 632248 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 8fa662f041..4f54ffbf10 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d2438a01e76c3ab32e0e04f5173654e6c90cd1ee31a2250d393d68823855450c -size 695548 +oid sha256:4fca976b7119a5365608edf9b2325b2044b570d79851af2863a1541c70331a51 +size 695648 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 60e418c3fe..e572cb9d5c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2680a31035b97e7bdbc6c1699d9adb42754d5f9df6e1c08b12ebc8d50df0977b -size 597711 +oid sha256:f9dfc00a881d937549252520026a8f79938ce4b19a64c19db8818a9a56e17fbc +size 597811 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f3871a13bb..8a80ea8ff9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:15d66471eccc724bf3d7528041c217113ee6c4b3c4cdc2ec27d6e0912d49bd17 -size 731880 +oid sha256:4eafecb39358735a4be80c4627c4959da73b6fc69054854c1003657257fd2800 +size 732028 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5b8b786326..d76f974c3f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:75672cbcacf6192be82fdd988f9e6de72dfd4dc5eebf7ee2bacd40a99fdff836 -size 693102 +oid sha256:82013fcceb7a7ea99e5ae1c292c71dd6d6c92341d2f4d5ddc8fa438beb3e4c87 +size 693990 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 799ab90513..35dd5dc24d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:84f06dc64d4148bc219d114c367f2523cd68c9bcd1cdc08d1f11229ea42ff0f4 -size 715890 +oid sha256:fe0a98d68af0acebad97abee7d71ee292f281a1e763ee0b50e03698d7b780297 +size 715988 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index bd953160cf..fa3402ebda 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:552c2416378dd490694ebb08d97c782df5e0a473245d4df9bedafa178eadc297 -size 683624 +oid sha256:1e3d57bc1257790c6e6a8e8734bc36f74e96a74a439ec1a6dbe6e27c9ff3a745 +size 683722 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..28f466598f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:419bbba2e50c9d241d1b0f8350cd8504b08fc18e7f06535f384c953dd142a269 +size 754740 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..1744cd1268 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4e43cbd79abd4419a2849549599b827f071415fed2ba1c9d7a3e6d8e0dbb7c9 +size 668942 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 3f79683de6..a5e1b2d49f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dbebfea182f62864476ae160b741d762603ccb667e8eeb75fdfdcd670bfdf0b7 -size 722930 +oid sha256:b0d89d52db72612f883c7074148843bd0b7aab69c7b986e4b0eb0cbd0681817e +size 723030 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a035e50339..09415ecc54 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2f2ad6ea4572a7ac81bc2fbae21b9a8e68185fc1954beeebe16fefc44ece9d04 -size 618682 +oid sha256:2208e48b0063b4c19fed83183c30b64768afba7189bb7a6161984bcace90d0e1 +size 618780 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 62b85d4377..8df5628eb1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:52251614ecd3ee7e0f1f5b290dff2d10aede30aa8eb35ee1b07144adab89f916 -size 682032 +oid sha256:b1489cc274bcf1de2f06eda97f8c69313ffc5b7cc75cac48b6c14a49799a9cbd +size 682130 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 864e24f30e..46a2bc2dd4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:57e5f8813cbb8e26e44a1421bec95050d4fe5f07384970e5574bf2025c4654c8 -size 584195 +oid sha256:a5a0c7dd525d17506e5ed1c0dabc0c66d21780a73b99369768c7e8be04e9794b +size 584293 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..471c237a2a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27d1a89b25d91996419a9809ac449afff670eaa0444df05e5bdd3f7e23927442 +size 606555 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..4013ae563f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fffbe5deb70f9ace8f12c370bb575f2fc07bacf632e8886458c74f7122a40b6d +size 523815 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..c772988341 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f383c271fbb061d0f880087186ea565fe0c242df4c0778f2c9df511f47e56b78 +size 631642 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..06d32141cf --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e48f20b7e2804e1699c70b0f66c9de3a5d23f959943e9b66b93a8b5852c6553 +size 546977 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..89aee8a31f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb0304f30fd028d6f58c99a479e056635505ee81001cf6bab82d16d7c1d6179b +size 604285 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..cf4ce2a14d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34948c24d66696c0885bb32ce78fb890502ed2fe04a9fa3b0fb258583cf2bd7e +size 524257 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..0c813a29c0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:268bd279369f9635d6b6723a937403fe96d97452cd66861207b32182caa0fb2a +size 629322 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..2812e53f54 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a3f76c7fb530b38ede1ca189eeb518b855c387e755a02f6b8428a5c22ea1e45 +size 546481 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a65bfd23c4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5480c05af77e21e14509cef6755555925963dda59bda8065bf73424e41ec298e +size 673238 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..063ec86d76 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b243f45c88bed8d26c215b67235556dd9ed6d9d35b399553ee50ed6705cf7126 +size 590003 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..dab3936f92 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3090a54f1bcb656c7211fb154019935e0815794716824eb4f0d028e9fc93758 +size 698076 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..f3528fcb17 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef0da86be2d251f0c995f1fa35dfe9169522bb86b267c3e4a8edddc0d8773f88 +size 615927 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..e53b1743ce --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f36a8c36bb26be6d349fae8cdcadf23d59efdf85bba3136080f63ed7d85ba50a +size 692892 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..b979bb8fa3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4787028260ee2bb91755c0afd7e8df5e0a0756b994835e3e11bc93e894ea0df6 +size 604477 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..2d66e4c59c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93c346e017a74741465e8286c04fc606c66083dfb27814982e2843744aea9a4a +size 717534 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..0c20ba35a6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4df54b68657a5a4b8886d5067e076f24404ff8c2bbcc07ac7e08fe2dc4c22a73 +size 626654 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..7c6f88e34a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc86f094a2fb7a573a97edd6225cc12abe888181010617b21a58983b3b473f91 +size 685738 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..07e1dde921 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1bca6b51c1b6119b95eacbdd3f4a73a56acfd9d0b69c43c81511ef25ee90ba9 +size 597325 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..b540840c03 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9afcdee166f83168527d87b995023b6ffbfacbee0e423b02e6f62cbd8aba1585 +size 710380 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..af48787bff --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2741b6d7599c26401a4b4a5571369b18593b7ebabd05c959710d81f9a438cd2f +size 619550 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index b73427cd4d..1cdae7f5c9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef1531dc1283e95e838cffc388fca3e537def70c17aa9b6e10803fa5d6be49cf -size 688386 +oid sha256:42359db64b58039c068431bc98ac959ff5c8a211e4f8a46067a8777fac364a67 +size 689274 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 2c7d2fe9fe..04194a0cb1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6ffaf360f201aa0250536aaf74987ac0a01eae97e7982243fc2711fb89f40a53 -size 714458 +oid sha256:d2fad107d349a875da9e653b3a31c9fe41a5e172f2f4762d6b016f118d513c62 +size 714556 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 3fd4f20ba0..f7def150f6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:286325934f41075cce8aad0b4740bddb1ec0980223a99a152cb9ef26b0d1b4c5 -size 670028 +oid sha256:113d0bf6c6f8c056e498a244d7b4d77c07ba7bf7917a833a5aadc09d8ad83e1e +size 670126 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 4f936b42dc..c86ae0ef40 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:db2a5682689cdf0d06a43d118d02f039dfe4e95a23b02e2ff86168f3c89df77d -size 696100 +oid sha256:829b63d498d46c1b41b86e8d8ca7f17029a39198a30f2f0a66d2bbf8390fed48 +size 696988 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 02cd0e23d8..f236eb4b05 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f0dfc303d30ce88062053049d95ae6167252f8c440979df2f649036ff22771e -size 664586 +oid sha256:6c7943ed8f194854bd29103352202616831f1bf21c8c7b746c17536ad8406481 +size 664686 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index c6729cd13a..2c8c4b91e4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:50f521d2c64c2b8140c9bbf5b40a5ed36f3db51c3f384ebb4643e3f25a1c69b0 -size 577061 +oid sha256:43f123f5f16414d2e56bffd05a8936f0a76a616777bf66e570708297ea9affe0 +size 577159 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 81c4bdcf79..746b6e3422 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9f2408b453e8125ff88994f35e224048fe5a9a6d28552335c4faa9e8c4e2a80d -size 690856 +oid sha256:26d047c429a964dae48ca47257e756821bd54e19c612f3b1601bac40e2b91ba8 +size 690954 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index c0449c402f..8d64b87e0b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:280d2ff7d0a216842597b9255104c52e2e82607337f6c4793fdc162cd390f1d6 -size 602393 +oid sha256:14d8228378e28ac1b9004d170d1b4b8bc1d222a950e63f56251e377d02b6ac66 +size 603281 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 3ecc68d33e..364936a88c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:529405c72551e4bb00d4b86a556dadf658634d2e19953f6cbc6edb3248ca60f7 -size 681232 +oid sha256:7e5e02df33d8f7fb492dbe260847d9edc962eefba1ed44aba849f91aef400db8 +size 682120 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 954c4672dd..89f8410706 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:67117b847f854fb0b64d48eceb291dfe3a79f76692021d59855c3ca7e0e714c3 -size 707304 +oid sha256:133d4f63de6d7bc33497452c497aa00ce54fdece7a4ae818f728722b94b1995b +size 707402 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 324a26dfc4..dd858f4171 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1632e1cd65926d45f6f8be7bad5b19b514e84d0c2429da3007cb1a5b94f6c6b6 -size 662874 +oid sha256:8f633013c12a7049bc2f1c493dd444b6efe926bc42a5c26b54bc6416a590d4dc +size 662972 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 2a7f4d50d2..e44d223848 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6111e60d3e5fc69dff7ee2eaf0c71a031a9aa4c16c1392503a7559491b67e028 -size 688946 +oid sha256:8eeedd26a3b894c5077f5da1be1df5639a9ac44e8e71a95e4f4acb61e21448a2 +size 689834 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index d2fa24b38b..c6e0fda511 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4d9676e72e1fb869ce5f51463fb9facd74814dd9f7e3f5f05727131c4ef9fd23 -size 657434 +oid sha256:804f0c29b9acd33c7e1d782b9473e90e8a06e1b211a4ccf7bf4153caa4bca837 +size 657532 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 06d0c88de4..cbec5562de 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8241737a82df7ac2791bf9e49b527eea44f7323c1d7b6bf7453b43128273044f -size 569957 +oid sha256:09460ad65e5d288012035e0774cf8d16084e86a5bf3d688f33ed89df2921d0e1 +size 570055 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 555ab1805f..7a2800fc42 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c6679c45d522691bd8d1b8c1da7745458132c53f8fae852ddb89544c4c8e093f -size 683752 +oid sha256:9084c4076d614f4e1cb15fe6ac49c595af2ba8f15c4f136d5562c5089cd5fcf1 +size 683850 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index eeecae9e25..e1b04f033d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3225bcb61de4d4ec13b71e9a9630ea1e3e98583698b851321adaeafcc2e090fe -size 595239 +oid sha256:d50524eb17364585e8c4b5c5288a5513397bb96f48aae43099a9a40d1b812969 +size 595337 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5ca3b71616..0270753f19 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c805bc1a0b244f4182479648cf2e796fb87f30239bd95cb6a61d1a365b8fc267 -size 608559 +oid sha256:054b77bead27d79508a7993c6a6c311739bfe7c3ca008c8a0a8914aa790f6848 +size 608657 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index a8df7fd14e..082cb7bb20 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5671af0f750e1e4468d582ddbb441376e077a5b16b9581ef0552447f70190b59 -size 586061 +oid sha256:c64ce68d07db6f0789a9dedf26dc1f67c9fc23eb1bf27aa40a5d04a41ea6e062 +size 586159 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 46a43b91bd..e22c7506fc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:53060f14d0f1be5594931f33a1cc4854f35c63dae4ce9384fc6f8f94ca1586c0 -size 538973 +oid sha256:3bb36172a49097371c3b2dae7659e515b35b193ce1aa603e03a123048a70ae80 +size 539861 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index acbed8c8eb..88b909644f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bead1132161b68b5fa39793949068b3d0fae6622596994e5ed8c7566a6235e0d -size 518597 +oid sha256:a692d25058b18fd33aa42774a2fdef432001a67fd3eea0abec34dc2b84443a6b +size 518695 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5af83fba1b..5d8736fc3b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a6a7d26fb9d7a1c76ef3cf548db654d77f89d7d84283d6548ee75e7c52c2c0a5 -size 601795 +oid sha256:c01a682c1da9a59b83d711cd02cbd69c1e06b971770cbff532af3915089c4363 +size 601893 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index dd64a49e61..4494259227 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:367be46f254c1ac3fc76e63183534ac8460bc31e03cfde9ebca1abc8fa591a27 -size 579741 +oid sha256:74ae8ed1b85b37bfe52635fbecd20a3faf1d95fe48bceba3f5887bfcc933f188 +size 579839 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 31a85c763b..18b8e5c1e2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:953348143eb96ddeef6e123fb9b3c9b0aab45d0c203f12bd6c54f016c4a54f57 -size 530483 +oid sha256:b7a40fabf6a19472eb038f5ab8d489282e6b1d0b3ce384500161441c8af7d8e5 +size 530581 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index b91445673d..83eedd8a50 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:57920a4e8b35978ed66b6dd4be1bfbb34863a40c8cd51cac05917d13af2b00e3 -size 510697 +oid sha256:98fe81e7811c4d27c9c3ffe638f4e0165f28a07c738902f1ba1cd5283a64ac1b +size 510797 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..c549488668 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dab51f683217dc59702d68b2124b8c8c97522d9c809e505a39796a977ba68e4d +size 687216 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..479bb84951 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee31a45d2602909f3a0d9228c77611adcaac37699bc58830517be43240152d76 +size 605265 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 4f221bf6a3..92105b16f2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:44228e5eb2a542be1acec345293addf2a4e922622e173fca43697acca682fce8 -size 604099 +oid sha256:b6400e17846d4ed3dd4b0d1f5d5c527bf152dfd2a5bdb562387a8a1b4a650092 +size 604197 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c13345e6db..260871394c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c31734e4dda8c7d396623b557fe96229a1d48b1162ff109f73cc82fda91ad01f -size 522149 +oid sha256:c300d3f7b627965608fa65afd41f92d4cb3b7c1ea87838727d757500fa5f308d +size 522247 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 51aac5f577..2a66aa0615 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cc2820a9fa13bcf2cbe153c752cce35f6abd8e57191354107854e1e8f236e0bf -size 579283 +oid sha256:654798c5b1989875ec1f0a8329cc2a455a946f1bc6de1b918304eb518d332113 +size 579381 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5dd993375c..d4607233aa 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5973e79a031fc7e633ec25440d6ff2b59847da1551c143745fbbb69a5f0603c8 -size 496691 +oid sha256:43531f6c64be318cbcef0203cc44170c8b95c3133c5cc696b94c68cc94722f7f +size 497579 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..404a958288 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0771b8afb8dddc5dded2e1f586ae7255dfb74840d6830248a6515d5281b62591 +size 713042 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..f07c60c666 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d9ac38f7904f2d052c0eb6254fd126e9c4e84bc7da196da425ec71df718090 +size 626700 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 8d20a5f90e..ed91008a64 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c1aa6ca210d5e298253112a07dd7a87b791eb821eb6031c60ade4e380efa1aff -size 558293 +oid sha256:5ec273e94e6eb4a867e417f51a9a61d2481e652299a74c044bcf0be4fa0e8936 +size 558391 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index af0381c8d3..32f0966434 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1df5f20102f7426cfe734bc165c9e8fabc9e4d6b8394cd7048c7c681e9d7662b -size 454339 +oid sha256:2fd392dc101cddc2c51c12b964c2f6b9b675340928498e9679263ad1a3cdb0c3 +size 454437 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 9f8e34a419..74663c4977 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:be687990cb8f941284fe7f03013eca5d66d872c74a6229897de0dc1eb8cc26cf -size 534167 +oid sha256:3c1424f29e05e626133922209a2e33c6a69777d29c34f80e411afa4485667a5b +size 534265 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 7731bd738c..abdf580b0d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:57a38e6fb5e9c6d34c9684446bd2aca0f44f158a07336f88d150232a0b41dad0 -size 433469 +oid sha256:4db6000fee5e0658278a391abfd32873cab056abea496b2162b99734c9a71500 +size 433567 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d16cad5fb5..5a8e0656dc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0a0ba528e2b6826d1a6900216f38a4d1c71ac1a3a1a8bfd340e4645521fc4b99 -size 601405 +oid sha256:911746c623011fdaab4492866e540afab3f418c4a5a6a6b69eabbcf7ab587c67 +size 601505 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f3a6726088..813b69d969 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d3d4684cb44b29d2f0f3e86256763a2a631bdc1ee2ecd79ecff4a73232c512a7 -size 578907 +oid sha256:8bdd24809b67aabe8f005a71ce10bba33db716b1af9a5a9a32d92a55d79297cc +size 579007 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index e35e5bf5e2..21dd0dfa4f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:532b023a2e73e6dd1b513b095b05168073fd625bc7e53d94f4525f6bdc008578 -size 535817 +oid sha256:f170d49c32dc1902f6d69591348c9d8816d2b9087af7d126790406bce3acb22d +size 535915 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 82e11876e1..b7eed4824a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:af77aa8fba5d058f1526448356b117e3b0b356b5a2ea9ff70dd866893620597b -size 515439 +oid sha256:9a7aa1dd2bb36b2d165b0a0a7702f2941c58716ab5d5cddc807dc713f4a76cb6 +size 515539 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 0e2ddfe33d..6754212846 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2e7fe9dd439b703f8e48e662827fc3ec2cc0171ff2546af9ef3a4a66edb12f11 -size 593851 +oid sha256:1ddcfa905b9bc8f9eb493158fffb9981515972e5c28926b29f413124354f7c6e +size 594739 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index d3a0e7addb..a2aa19ff8d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:13c3c302de4c0a8bd6af4485ead32ae172ef165fb6adf050f11a1f8c7bbbaa6c -size 572587 +oid sha256:c3528ee80bd8f4ae9f94f1f92f9ea6ad49e8282143b36c1f366a43b9ba17ab3f +size 572685 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 75e1895ebe..1cefb7337e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6baa110978ae56593cb49df136ef0fe8662427a2beb62f0d5ea01d22bd33f58d -size 526487 +oid sha256:95f1a27863b7ae5ccead282970fb60ab818d1584c4c8ebbb4f4ea13010960947 +size 526585 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4666a0a6ca..ef4f93015b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5e43c9d7bfb882107867fbd162481f8995e0237f2ddac565b495bbb9f6ad3c17 -size 506701 +oid sha256:0345f7da554f95bb6c11af576bc978dda1937e4e5ab077ae053fce569d4b24da +size 506801 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..de00461d65 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17fc681b2a43919f25a1d54031e5884068da78111d3e596220e7456a4c308ad1 +size 679964 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..11fe2cadfc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:944ced6da2a3846902f3a759abb33af60e499add160d7207da76a722f2ac0857 +size 598111 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 8b3dcdecb6..19c56fbfbb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:65689dad29fb2c329ad5276e783723c23fd8e2bc02fdaf83c2490937be5a75e5 -size 596945 +oid sha256:a4227495f46d697bcf98c1534510549f728f4be3ca553d3e4dc993a0cc4db77c +size 597045 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3c932a81a8..57783bef2b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:44f96117a5024e4f1da385edd8726335980dbbd0b82bef4c2f3286c1998c5c57 -size 514205 +oid sha256:a80a170c52a97384213bf7cfdb943a88037c8f5aa0c578bcbd9a8f3df7dce962 +size 515093 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index dce06eb529..7809a6b40f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b29e6d74d8033737cbed58145fff00a3d1e5a59ccc093955dd3f7b74543d5a43 -size 572227 +oid sha256:13cf076dc339c4b5089389b517d34b77eb4cfb67dbf23c6e172de8ef8d30d523 +size 572327 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 11263dcd0b..2f75e9096d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:06001ba22f6c22dc426f7b2b89ec67c892c6c704708149f6c71b607786954c06 -size 489537 +oid sha256:f381aa7b7877585a491ebad3c4dd86468993158cac9e61adad6abf6aa8fb2037 +size 489685 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..75be7ab198 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e923c3d695c94c8f573507671cfe016e3535c0ab538046c76e25d14a806717a8 +size 705988 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..18fca52075 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb6cdad213be45287881a7cf3e3bd537d8e5bdee0da7a4f7f69ec1b394d38906 +size 619548 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index bf1cd308df..e1dd8c81ae 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3dc9076379c74bdd31707570c83acb17f1c44172dbf6b4a43280af848b2031c2 -size 555135 +oid sha256:155478b80ea91a90735ec24cf791e4fe3abc0ea094cb4ca939b2ade0ed8da092 +size 555233 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 16b95bb183..95b193639b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7615ce047dbe3ba9794312dbffddc67fd3f5ed4e8c696669f7c63618b871d7d3 -size 451181 +oid sha256:d29a01fe8e3e679f16f3036dcd6f1b4f1015969bbd94b0375052fb4927599494 +size 451281 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index da8290c202..69ed29dfc6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4b37d2782a0d785142ee0d8a8987f06781a85a561fbd4ef34cc37ba9a3c24969 -size 531009 +oid sha256:7e4469f2fb487ff1809abf3871691bc070713f0c3389c0933547d24487ca2946 +size 531107 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6d7ef5d488..7153712a6a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:966e2f3457ad2f488082507db646800e3ddc7b5fcfe8adc11b103f0d54ac3fb4 -size 429523 +oid sha256:83d210736db7827eb349f5ff2afc369dc8f106482c997cf53f92b6c823991ba0 +size 429621 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 396ee77a47..b805f63c6f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:57ce9077b29d8e1acba50a68cb5e44a06cd45ec8feb5a059754170f36aecd5a1 -size 628820 +oid sha256:09ca449788098d53a1c456a91fc0fe2d5b7a796c75c21f5f1748bc715848c157 +size 628920 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f36e11d254..9f6008887a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c1586a4843fce89b0aa23375fb506118a4ed65ad778e5508aaa6785e0276d46d -size 593397 +oid sha256:65d89aaa28a78b7d117884b59b36d48b2a2ded368df4b45951b85b8c23b4b42b +size 593495 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 80dd410d7f..23632c1b8e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:299259e480441f2dbf939d229a66de555b3e77a7f0a7ab95c6b4379a417ef316 -size 561257 +oid sha256:3e3bcfa493419fc5c2bb6c843a3ca63e726d1d9f6e803a86b0987c202a154bb3 +size 561355 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index b20fef52d8..1e03cb98b5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1006176516be02c2e0399d82ecddb4df30fe10263c9d6cd040d50de486804bd7 -size 539301 +oid sha256:6beb57f67862999e7c81f5b2d6c4dcea7630d06beef1b87c40f6dc95e6677692 +size 539401 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5dc4bbc765..24d3400e81 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0bc718f5519da5883170e0b8944adc0138bb85e164cd9ebc7f664f77015b258b -size 621266 +oid sha256:04406e1373081cdc47462c7fd9f03fe5d080311ca13a3b64b228a69f40247ba4 +size 621366 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index a6dac6edc5..0f50473760 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1636cc435e1838b1fc284a937ab06079a6bf1c2260115c9a9edaec51f9de400 -size 597633 +oid sha256:946718910b7a2be73ff7fb500588d050c2c61e370d5b7149cc60c4e2da0265ef +size 597731 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 449f5b1ed3..71acde60b1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1682c1d3931fb57be2379b357e0319db40d53ee1cd9410cb08a9c38fa06f9c73 -size 551187 +oid sha256:dab26649d33ae9537fd87bded4b10a644bcbd511f5eec9473418a48dc0a23a55 +size 551285 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 22d60b784a..90e4931b01 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4407f925e9bda1bf5dcba2657949b94d5913c0f03973ad020e8fce02b8aa18dc -size 531205 +oid sha256:644c56ed267afb15fd36707ad65f84027f1887e461cc8ab2440c182af7bd6bfa +size 532093 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..c262e1ef5d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb452fe496987a6393822e33b5e814b2e8ba8827765afae3f1888670a134e0f0 +size 761102 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..22a07c4246 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea3089c016dee551a4ff94a310ee097e043636aa2dded14109cff361af975215 +size 671604 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index b6c3402513..6aadfcae64 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:446559f415bd89a68fc3c842ca2c67a943e3b8c74d4308a2fed1522b1dc67b72 -size 631514 +oid sha256:33c150f02e60f5886e6a0caa4b3a96494150deba74e4abfefc869beee5a3ee2c +size 631612 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 75adb4f588..027e494b72 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:737a3963f2b2cc274cbc09c45ae32fb2e37612426526e401696ace2bfc5e0181 -size 533923 +oid sha256:c3ac647ca963a31f9fae7abd9fdfff7d25c955d38d95d044c26087fc31c6ba7d +size 534023 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 505158d0b1..db6e4823e2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5be816cddb6a3bd6fd94569e068f413a5de26675de3ebe72b91a0c978bfb9f4f -size 601171 +oid sha256:c5f69bcd0b409eb196e040fac6e69d81e87f7b3340ffc00ee6726cd95fe7d0ba +size 601269 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 95200d2ab4..47d11f0549 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e4e466ed406b6e41eaf6f0703c89be720c2f98d5590f525e82abbf871bc35e8f -size 508465 +oid sha256:c20c182cdb00b2cf7e93bb384b8f07cec82f24f288ac584649ffbeb5b286d4bc +size 508565 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..6001682ba5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fac3c84c6ac604f8ff9b47198a7e994f264cd413fd1965f22d016cfa9443eff +size 787076 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..11411869a2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32d6d751d56e13eea6ed5f1b3360f78cbb6e98eb61d429b615d2387b060b9d4c +size 695506 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index c53a250ee3..64edc3782f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6a93f2755d60d908d1b14ebf69d4b1f44e0fb21b80d0321c74f6898365ae2ab0 -size 578603 +oid sha256:2f766c65a22047f570d7091d1057f99c4d7d67e4a9335877c2698a75b835b203 +size 578701 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index b99b907cb6..5d8ea765ab 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:98831874e22596c33366771f0022e5d688f667598cdc5ad0c8914e7b9718c96b -size 474107 +oid sha256:a752edac927ef802eada5b857b6e2ece583fc9e68bff4aa73a357483c928c6ba +size 474205 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 5c14633e33..f0cb42698c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c2f837bbd8b7840465fda8a1d8efcb21aecb1b957059368b66e151b52ec485ce -size 553293 +oid sha256:434adc381c8e470823396949a8eb21bebd32185ecc81219450420e9b3d97e291 +size 553391 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 1571c8b16c..9c645964a3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d71f6b9cf6654428a838e0816abff9fbd413be07244f58a009dba92a6cf556c3 -size 450869 +oid sha256:8eeaca9d84b2dab15993ca651dcac8ef5d8f957934ce47c50e1b9bd8118f4035 +size 450967 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f2ad426cc7..e8ef2280bf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fcd4a3af3dfc557c3888c2b32f67bf676f70560a6922a8f86ba4fe3101a3b98d -size 621716 +oid sha256:f1cff576d6582932498553d4f2cb8cd0a2e20f12de887cb9388a1c8d48a72328 +size 621816 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index cf0d3cbc7b..3695f85634 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a1ea731058c7b3ae053e4d9baea589fc5a4b1f7f8830294a1b9289ec367cf8cf -size 587033 +oid sha256:c290455f2863b2393502a71c33f98e1347f806de0280706f8d5a42395cb01442 +size 587131 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index d7268edd3f..62f391a851 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b8565d1dea021439f04fd29744c1aa6b417fc57e927f18a13b8791cf0a2a150d -size 558099 +oid sha256:e7467629b4f1b5133fc7a3b5e4aab67734b5445f3f420eec0fcae67f123b664a +size 558199 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index c7c19a34a7..ce2c30b6a2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e835362d525954db793bf8c515f9bc30d4c30ff15393b78983307dd4ef1d2d1f -size 536145 +oid sha256:856aaf63806c2998993d66a396db6e03a019028a8f6dd0edabea534f323ca368 +size 536243 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 11a630338c..ddcbc6e60c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d5819d79eb6b880150d67ed804311af5d3eadf3b2a9c128dca73a3f75221f693 -size 614113 +oid sha256:b4d03853d9860b12efe333729f042a6bfd6bb62804b6152372e752d04e9111c6 +size 614211 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 044ab78558..7a8e8791f7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e10c3cf847ac9a0fa703f5bbaa7089514be1ccb2f052be145d6f77a13bfe23fe -size 590479 +oid sha256:b394da4584723e66f04c794bb3d81f332486d6e76ed8d8ab03c0bcfd47a23631 +size 590579 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 53fe71ec13..23d94168f9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:00999ab5a98a52f607b5caa7a1b975d6b7fc727699d23dd5a939552551107d0f -size 547191 +oid sha256:32615cd5a9b7dfa9ae882e105a04a2c4e19d8775ad06dfc98175e19442e0ccc0 +size 548079 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index d4ea5d53d2..b98c0c871b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:02d775b9d8f7183bf48c390011885d1726e991c17d7bfec459b6ec8352bcb254 -size 527999 +oid sha256:4f2494f4d233a243008e7c03e1cc7b92099b84c376865a66650ca1eb760a3fb1 +size 528097 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..3ea67c605d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b442415d7995a9c0834c9c4ccb466fa78e0ba9549f20a9951b0bb6df53585475 +size 754738 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..08b74bf529 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c49ae04b4c538c4ab62c6470af4959e5f81db8e00b774dac0faa0415dc976ca3 +size 664450 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index ab5f09c7f8..02c8f74bad 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2acef83328c135ceea20c075ff24d20c93d4c85ce1d3a18bb5b22d5431059cea -size 623572 +oid sha256:bca17d868d55e65dbe3b0d5312390939a764fc1dddd8c9b4343c5ce4c637fb92 +size 624460 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f9de7b234f..cbd5c4729f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:379cb88e26b2d68a72dc378722f3aa0df500f7e7f1b35c7f5c8ed712c4dbc886 -size 526771 +oid sha256:2a79030b38b6ee7ab91369014bcd28f33c49d34290865bdbeacb82c0f8cafaec +size 526869 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index c57094bbfb..89ba953ebf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fa26775a00f10f64e8157999ab29481d3aa9375b60f4325b4f0e42bdccfcb746 -size 594017 +oid sha256:a62aed4d29b5a74ff50530f11c4348be5c10b032b21a62af5a43b575d4fb03f4 +size 594117 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 185dc34ac0..68a3a4ae8d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1a8007dd0242e86d3b9bf714c255e96207a5d9c09cb67569a2865f0d86f8a069 -size 501313 +oid sha256:25911baca4baea375ae9a19689377331f1f2f611d81ba39218ff593ca4f014d8 +size 502201 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..fe5111d647 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aafbde82754c7fb0f7bd2b3849d6e8e299b5e26b941a6e8eb540083e1e690f1 +size 779972 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..092b9c9ec2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73b373aa8d5ec097ff570952f9680e67a91e4a6b3fdc6d489d038d9d1ec573ad +size 688402 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index ee146431e3..525377afa9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a729f6ab7f739543d9f134ce1212a80b83a034c78dfd07d626feffa8d3d1e349 -size 575445 +oid sha256:6337f07d443b5c7354133cc5404d23ade242feadcc198cbc72ad5ef02e4df3cb +size 575543 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3d542c2af6..fe00838d70 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b7074d0d79efd634900b215e368dad8d8df780daa0b8eccc08cd42ea670bdf99 -size 470159 +oid sha256:f5ae397019d4e4e117992ff44d3ba38e97967786e995bcf71329e031f6d57f99 +size 470259 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 3efd8cbf9b..b50e04f1bb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:913baa632929d97fb8bde8a594e4bea1af6cc45b3992e2e14a9151ada6314ace -size 549297 +oid sha256:20671804de58a809ebb9a820d956e04d251ba21c8ff1f847fb21012e822b6efc +size 549395 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5fbfe3f427..fa0b9a0114 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c805a71c2dcd79fc6e85f8df9ee60d32ee702003594ff8bc562e875f379caafc -size 446873 +oid sha256:495c80124fd5d1282857b99bda325b31ebeafcee96b77a51baaf2d70ba933070 +size 446971 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..7d9b89573a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8826f35a86340f683033df995b5bad4e49257686b30e2b97055880a5a274ef96 +size 641204 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..855d770741 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5029787d691148c287f47d696c2c4ec45418b7d551dbe411f159750be6c254e +size 555257 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..166f6badf0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b049e1cd5b001875df1ce7a5d4688c5d27d79cb3df14c7969a4cca28364babac +size 640414 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..f4da4d5d7a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8864d7168840486c43608e9c08ebbb2b0b64dc281cb69e41af77899356baafe +size 560533 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..12f864690f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f26a49bfb94957e11c30af90719fcdc6955b202131d5d437d5837aa69baa2400 +size 732128 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..52a60af4fa --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b18674957769945c5534f5f791da289f063ce9ca44d8709e90cff0a8944a00d +size 642678 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..28fc237173 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df3dc1517ff55647db8f200c879c67ecdc6e904e3fc0a4dad0cd11606912ca0c +size 721818 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..b38f10c122 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56132ba0e671d4f3ec9518ace0d7647766b9d73f57d30bf87400d6193068e8d2 +size 632368 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..5d5ba52540 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2d7ef1a23e4503cbcffc069e032a9f27411dbe3c23c29971cedf61bb6194e0b +size 731928 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..3a7c658f0b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b350f618d34ef7fed7798e833fe363d52ffb3ffb0329464e77ec885d4bbdeb52 +size 655108 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..b11ebaa7ae --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5692de1c5c45f4b526c48ae2c23c2464de75739a91a3be4a4a3814300c1761d5 +size 720384 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..e37aaebeb2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a0a7e61d295e2ab34623ec3e6b4dd9afa9edb724f31fe2f582d371e7b8891e9 +size 641148 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..ba24181a29 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dcfee0cd5dfb1639a14170c01e6fc66e1191ca5104e54e0539026e3432af9f0 +size 641998 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..6f1a8b6690 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e96ebf8637fdff18feb4b1b322bc74567e8c9cd73dd6c205f2a232fd4b0a0247 +size 556051 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..650cb3f5d8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f41f4386f453d0d74957bbb0a295547d7e839ef069b17656163f9c39ce193787 +size 640418 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..03ec1a612e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53ddee27eb13bead3c9e28637382b25681c0e5b9549d784b7167c389a64a38d7 +size 561327 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 4ed0a129d4..1216aa1c22 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:562da4ebd41c5a3fa50803d3fac3b946b66b779c7acdcacab04a1bd8aac10ac6 -size 803612 +oid sha256:1deda029192ae1815c100a169c1c502f5bb4ba75c0e027f8f6cd9d0d1ce93c24 +size 803712 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 8f7e3b4632..7fb98c8a70 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cae3f62737ecf8ac4f7625c66e8d878f830b03314b819ad263ba9c1ef73eb418 -size 776330 +oid sha256:a7e15f67764f51317057dc78d8709b7ebf1823c5b6a928f006bb55458fb7eb52 +size 777218 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index bf3ecc1e80..da344834db 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c3c4a01890e7a375c2e89b658f08e901fdfdcf1a1bbe36fd3c0878228f7d9740 -size 707364 +oid sha256:958a74e8d392c5c2d1d214baed68d1f2f9956fb08ffc082d3c5f2d26aeea0ef6 +size 707462 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2150a5f3e0..68ce1800d9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:734895f609eac6900a26c8cc5234044e0219089dbb081d7b3beee07ed037aadc -size 757188 +oid sha256:e27aad97f66dbae4144cc6d5a863292a2618ff986fc2d81187bf61111226567f +size 757286 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 18430d28cc..9f880f0f70 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4e5c972608a180a7c5b57eca3bce5dbf86d62b01ca4435896e89acb22e34af05 -size 733260 +oid sha256:cb9d7cb4c06ce30c02ccd29e16e6b45e6ef3fdbdb96919ae934b097a926ff2bf +size 734148 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 5363984413..1444f13253 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c3ce1cf880730a5378182b9534f2a3b94b6ab4e8b6b890539ff0d5510bac7f11 -size 604215 +oid sha256:9a965ed86b37001105435ed6918b2e2514605d5d700473c539c2bcdf4e7e5fb0 +size 604313 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index e42b5cfc27..f69672dc13 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f62ec3f52e95f267cc97c175a971728b7ba6329370fabd9726cef679b322ca24 -size 791914 +oid sha256:d109f219bb52cbc8c968b7914ddf10a9d0f909482e8f00594db959b892edc8c9 +size 792014 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index c1f22ef905..c1c772769a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6b41cf7a67702e11fdc11436bce729d5c2e1e72f23e2fe8e54cbd833413d4696 -size 766260 +oid sha256:f709d07b913c084d996a0efd0e4544fbd1da2ad45f7b2fcdb44ed4d815b15805 +size 766358 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 768e4e003c..8bcfd1a708 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a1e3ec4a6886a50e9b4c1c8ae46dcd2628296dd9d0bac95b7677f92a83ffe216 -size 752052 +oid sha256:0b518e096dcae4e4b3f7e8799345c0f4aa712d0d6ef0fbb2bb1ecf8600d25aaa +size 752150 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 9c48dad034..4845f5e511 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09f9b723b3fb6ae3d48e67b9ac240f7c872402eca4750e13560f41a324dd0228 -size 728122 +oid sha256:654a6b97a991bd43c2b639062c509ddb5ed5ee2acba1ecb80f5864af8cba1242 +size 728222 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 4e89526f93..2c9529cf89 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e796b14916fb9e8fba1ccad7c850de5f7308e16154fb2d2f095ed70638aa4deb -size 800780 +oid sha256:7fcb413c167b4837d7adfa54423197e25435eae632f8cb044380559817cab8bf +size 801668 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c4735babfb..dec147c93d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:163746e9c1e9bed5733016ffa7b0b32c442987d0985a2323ba064ecf97932701 -size 709210 +oid sha256:4910cf1d25a39531ca141f2855300be84285e7ff81986eb9a2af57a874f746a4 +size 709308 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index 8abcea8d1b..bbe54c4189 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:431e8f3c21e0952e497eb1c314bc54408abb90fdfe6ac2a51a96ca24b27c020a -size 779664 +oid sha256:de298073abfaef222b68a796ed2eea1ead2a673683b45bce0f99b4ebffaa7f82 +size 779762 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index c5265b1db7..6b7b78aa73 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cefe9cb2181a413700124aad38059f470014ce79e27ea36eb2c557a7a5606fb4 -size 682074 +oid sha256:41b67f91fdc3181837e5b93e36effc29a71f316374d4d0f26d378af74570e280 +size 682174 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index 3b80addb27..f0c126b885 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6858fdf464fdb901cdf0538202a3f16e9b15b5fc9180e2cb6f62f54163e636f8 -size 651252 +oid sha256:52977043f99b5db1602bb6bc1844b1ca2bedb1f350d5c9659283e717e87874f9 +size 652140 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 565340e50a..66bd36b5cc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b6eb82b6a0a7f9c6c496152ea288fd01a190039c91a680bc14c48b435c347fe -size 568215 +oid sha256:87656eea83c96dff4d4fd6cc81bd4864032bb06a06a614c6f44c9ed8f30632c5 +size 569103 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index b536feb75d..26fc8e17e0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2484d2a5273fc5bc4f1875b7be8e474c18dbe93bd53be83ec8c0aa949e5d6cf1 -size 764914 +oid sha256:600f19e77d9bae5c8537e43939c6c782adf20aee6723939a1f09771646f5af58 +size 765012 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c6cedcfc91..1184362b53 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ab8cda9fea71e0a8b065dc1894517bd976e400af62f0d1a67e0c45d245d05319 -size 671912 +oid sha256:945ae3aea063cbebb8644d560c508d74cd230da0dcda0e6142535924c6b823f1 +size 672800 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index d3ee41d51f..27b0ec3a8b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7edc1c0fd0773a0ef244f3504f40147b7392d50173f51bb4e7fa7cf7076913fb -size 745424 +oid sha256:c4fe204c89b890e9131336715b21adefae9e523f4f903e9666fbb58e288f1259 +size 745524 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index aea03f4115..9917660f07 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7ee46d911a7f755c41c5d054c35bbfc9a2cae62ebeb9b328cae347934838b9c5 -size 648132 +oid sha256:25905640e9847dd9f9d8d7c4181f6f601450eafacaadccc1214fec2f682c8b2f +size 648230 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index e1dcffc646..9e71ace23d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:92bcd703564ac5e29352d561d6282ba32cba450f48e658ad4c430a6804206a91 -size 782942 +oid sha256:b46a0856f83274d90839fef590728fe5991a7573759d540494fb0c08124b96c5 +size 783040 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 2455432d03..1f5175e458 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4c104158f14c79381ed3ef326c294c17d2de371aad4f3a548edd48b6b70e7268 -size 766018 +oid sha256:52ba48e9c8f5f562e2642eb51c26dc9cc3dc7eef4c34f450a7087e64f3231ece +size 766118 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index b5a4dd1683..29ba6e1901 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:64b21f727dfa8c751d51c4d38d31a2304f6af18bb3918ac0fb24a28a6c50c7a5 -size 686742 +oid sha256:a757705715936c49651c2be1844ae96b3afc342680677290561cb1e4f93969b8 +size 686840 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8be6fbab51..5274b0aa3d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ada7e801a2f366bb3ca3d14d3cfb684cb6397f7baeb036c45dadb00af9e42bef -size 736566 +oid sha256:ee13ddd31e8caa4f51615c327e8ae9ddb8b28aae1ee09c92367c5a7d2f1f7c96 +size 736666 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index ec9ee9d4f8..f0b8fff787 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6ac8bf5ab24ac53d490db000a0579962d2273d652ab4bf999e8d8bc413633aaf -size 722948 +oid sha256:695d4e6463a2051b25a99e442584c90a06ae9181dfe63101026509c5af983467 +size 723048 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 5b92f87ec5..cd7cba3183 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:681bf1476dc835686eb1633bac236598f6114252841e9303dce5d9fdbee0f7d4 -size 583593 +oid sha256:1c87df0b9b941d376b30b68945cee35dca9026751eaf3bb3cf783ddf9cf87d98 +size 583691 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 379f51bf83..bf17ba201d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3049f7ed513d776828ad0a1070a90024ee90c6263046b7b8c7316eb929099595 -size 771294 +oid sha256:63805b6289df1c517dbec278e60eaefbd970962bc0d5314a9c309790020694ba +size 771392 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 586f56e821..9a8c4704e0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:88d247afc64d3e9c863a08f25a2c7b7183fa7f68df67f4027cf338d4d8ce45b0 -size 755160 +oid sha256:2ff463335a7b97f9a192c126cc12661282f92cb3e044627aa1acaffa7115c769 +size 756048 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4ea1f65c37..72507d24ff 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2b1f2ec80cbe0c930a9832f55f8fe7ed0b93e169927b41692fdd7ebf7c47dff3 -size 731380 +oid sha256:e46291fc7caf535a2efaf3bff792f22cb134ca3a4df26b1ca943b11d1f524ba7 +size 731480 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 8cf611623a..1150dfb7de 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef1ab769a50c3a20d815ca37aff3806bd8a8e861066729271eda55579a6b33c8 -size 717022 +oid sha256:a1f0faa1448376e678c4a045c22d599cf20fb951ea7fd6226b27f3229f2fce32 +size 717122 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 16f2bdd3ad..e183745a61 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b16a1be8b6b12ff57b7e4174d2c7a4f140bd648e4c94e93ce6a18da872da75e4 -size 780948 +oid sha256:4a6c95a39276b85b4caa7611e72d206b18f21d756b088123a42eb7083c5dfdc5 +size 781048 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index cb18b0d545..b9287a4cb7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0559b6dcb20c38d03252c5668dc56c7d9cdd996ff452db5a2a0fd693d18ef573 -size 688540 +oid sha256:88b34889cafa29a591e8ed1edb7e5dd51d58ba6cfb7400c389f47108ffc1c25f +size 688638 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index e2e9995060..cd438cadca 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:93ad37fd2547007e46974b9093d3e9f67cbc95e669c5e84e80137885a12b6aa8 -size 769452 +oid sha256:18ab3ceb9be350168aef98ac65557720a718d68c94e033b31aecb14b95e6fa0d +size 769550 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 7392823084..79d4a88661 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7c91b10ef1f81937a27512ae713a76e047b81ab4173e6a8c344dde2deade815c -size 671764 +oid sha256:13a8a55a6d1346154077921729c50b95f6f1b65f4ef021173b2e472888fee9c1 +size 671862 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index 8d01f03c06..402a576ba3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:403639537abdf3e314a2866d3b0df75ea567f2b70ff5dc535b18906a6036b1df -size 631370 +oid sha256:ca703366c54e84c9a348551a484741a1230aa57035bfe8c2dc78b7804a563d15 +size 632258 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index e6d7ef214e..42e9091514 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:56a204252d3b007a1151a8087f176dfb5ba3880b4d5719213d9ef706787a806f -size 548333 +oid sha256:101b5ca514b324b34d06fae13330de93c25ad51a479b704e436764ce3eb70aaf +size 548431 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index b33112b5c6..7f89a51d03 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3d172475ffeee228df8cbcc5b359ff367d7cd4ad72e6f12965416f1ede256b0e -size 745032 +oid sha256:1a2557cc302a2eb9dbd4c233723a621e83620248bab7bdf1fea63caf53aa930d +size 745130 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index dfa116ebdc..a3b8b2bb1c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d0384cfd9e5260257b202120d855258685424ade774d7a5c1118174002f41e56 -size 651290 +oid sha256:01289d22e1f13876865209f168c8aa0c85e03434e1fb91d9be2fc1968db91c3b +size 652178 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index 6d8c31b715..ce6b4d9421 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f7ce4516b6b9a113d21b38b0f981b600f5f805335c37b1a1eebb838d8b23b506 -size 735212 +oid sha256:1a4005da4f2d2abcce025f773fae7444b98350417371e3697806913bbb0055cc +size 735312 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 570c1488ef..df189d160c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c6171722e01f1786a21dc1395df33ba14894c991c1cee9ac326828c069e0daf3 -size 637820 +oid sha256:c5a3924a227fa087040cc51141af19c79781c11010a796b9315f2327b528efdd +size 637920 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index e755fb9c4d..d66e3ae0de 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:effa0e658a51f874a17f754386116ef6bf217130cc7ef42848b0a805abdae6f2 -size 709288 +oid sha256:8bc497a84ee80aed9773cd35622bf963ed082064d25818ac55d427282ef59c1b +size 709386 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index aa4c571cef..37c4df34f2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c286d2a6dd697cfda4fce13be73d656c39feddfbb25001c26ded1f37a61755d3 -size 669078 +oid sha256:46114ed4ea9e22d0919a83ce0affdc8e45ed33124c1f34c86a1b42767b99b162 +size 669178 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d584c6d54a..d2eb6f4eba 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:83025b569297ee5c3f6c267a36bc1ef20c38e95c821e450b66aa11b582fafae6 -size 696208 +oid sha256:4b887be42f1842b0ad5cda99a80a9452e7b6cd3a43f3e103d79c10305ee679c9 +size 697096 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8e187b235e..337282706e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a5bc167df505e13fe655a4c6a6b8388c27e3af3470dd726793213cb5e0efdb15 -size 667494 +oid sha256:87ade9a11cb541181077140e67949d500a564a194e4dd8f88f1ceaae5d2265c6 +size 667592 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index b75af01116..278cc39fb0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6a8d2b354142135ad718440326c9b6ca7c7a166eea0fe579f4b4cb36ec768670 -size 707344 +oid sha256:f56edd28daed1ad3199c67c8c78b12a389be9b24eb1854d4a64efc6dbaa5cf68 +size 707442 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8382e9099e..5b7602c5b4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ed3260c245effa5385504ba1a81a76c41009811e8abded64c685dd796bc51775 -size 613847 +oid sha256:6dc59c983c18175c6bb4cd1fbbd33d9a0dc60d2803b1315d73ba63200e61a5dd +size 613947 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 01d53a2400..7ea3eb8798 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7d9d4e75b9ec47766515e6fecf3a7c82a6e29e3d9c620a834243d18bfa7dbcc0 -size 678580 +oid sha256:2e8d544eb8c75e9dfe82286a07079c5a6898e186190e382e0585119cf7561163 +size 679468 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index a3449cabd7..8186ce4f87 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1418ca2fb3623060cdbdfc7e209f7c4ba879dd26198a6e76eecd90bff499e15b -size 589821 +oid sha256:12d9b301130d4b3f6a345b6d6f6cabbf5da318ce3a48afa10db991af431994ed +size 589919 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f2c2b9be6d..0c08bc5cce 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1f9a083763bab71dfb0e5c30f7a09e5580221880a069d8dbfdb502654c0e7239 -size 857238 +oid sha256:b34ed1a4daead1bdd3e955c0dfa54100fdcc25bdc27cf377eb3b0ae978e20e80 +size 857336 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index f80a4cc1f0..da3b4a2b37 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d4a571163530cda7d827000c93316e3fe84a2eb7e95035e90add2902354619f0 -size 822802 +oid sha256:f96dd4acaa5ee4e8eb59ab70fc01a07e870f6ad3c07a290ecf4ab4782d77f318 +size 822900 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index a3aa789374..61b5d6d600 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:da6109f5d195bca9c37bec21eeda60720d91d8b3b9ccec7f0b162304cb9e7694 -size 751968 +oid sha256:68543fd18ef59d1378902ac7a9eee76a2bf988d1a152eb596189ca6eeb6a18ee +size 752068 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 3039e8cbeb..49dc23371e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d25251c83ce43ede1752de4d05b9d406c85e7a45cd9a5a490cae0625c9b194cd -size 765330 +oid sha256:5683192c79393d7e0844dd0290e88b88254b6da1e2e6feede30000c95303113a +size 766218 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b24be74897..43cd82f1a5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3addfffa559dc5cf8a0f8eda8634ae571a2c46dea06d21804127161f501ec8be -size 817178 +oid sha256:fc2e3f500516afd7ebc004f2ebbb4441f99ea1adadcf75af4a7a6bfeb8be514c +size 817276 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 4a1242ecc9..7492b3dad0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d08f82b2094db6d4038d4a580120374dafe7d2fe1ebf81b325cf79fe74ae4e2d -size 783728 +oid sha256:f8b384ccb144cdeb82d7598014393e084049a86fc48ac2eeac31ecf95072e032 +size 783826 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index 9dc8944885..edde87adad 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:74c490b6aaeeac95c78a8f2272b4dec1d592aad6b6680dafd55ece94c8ac126c -size 615421 +oid sha256:92a11476e5e8a4eb5bc136d7307dba43c97c0918cacd1efe44a1e907ce6ad323 +size 615519 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 14e43bd800..b00a5479d9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e3380755f4b8514b95ad976682af89d23665a8b77eeee890930b2d0b0dac8753 -size 647924 +oid sha256:21ef808d9f101dc7d13c42e4648a29ec274edf68579f4bd43c3c947b4ba3e184 +size 648024 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 179792afcd..68fe406142 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2c128be15ab65f1877a2a96c49ee793b6aa886b51f9943de72b814f40e6e2948 -size 842234 +oid sha256:1f6a190c87b93d29409ec01b37ae7ef9f1c1ae23657ffba9ff2096c3e40e0ab5 +size 843122 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 82ce2b88d4..b280b21879 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3f0b126e8cfcc7c6e56393fe67d4b435a61ad7dc9ff33f8fa711cfccb2c99b1c -size 807058 +oid sha256:8386cab186aedec2030ad09d8dc22bc89ccbfe58550afbe2d1e84d710cb27a71 +size 807156 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 322836b3d4..97a13d0ccc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5c4daf9140a5703c50cd23c8dd99a0ea6ba6b4c9265617640a026ade87793b6d -size 807156 +oid sha256:bb8495f24211d15cf1b90e3e740e1926d1390fdf82980adbea5c69cd50b55539 +size 808044 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 72cd9c8db4..9a61381b9f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:83fc04b0d317b3ea6820cc1d362e58687c07b7e469aecbced0cdda8e0c12dfb0 -size 774200 +oid sha256:d5fa9dae1bf7174c23d7d1363699bad4a082e57d0d22df293aefea891ff7de7b +size 774298 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index bcbdbca191..5f9bd91834 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:827a314ddfc44b2bab837c3ef981d49272c9fcac6db22e819998edca05bc2c09 -size 855590 +oid sha256:3cc7189f0604c1b7e2b1234e7cf81f873862db47b1fc4fce6f639614ada1d282 +size 855688 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 23f6ee2d6d..3cc3bf5a27 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8acbf0f8d212af4af781a96832a98b7293618b7f3b42120f61f61f628594c604 -size 758050 +oid sha256:4fac29112550adc3bba7df6e9a1e4285103a8113fdbddb127d625800bd453839 +size 758148 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index d17cd1f1ab..e43e3d5b8d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d34ef0949a9ac56474f2a546072552bf8084463d4fa727671d15148e3407575d -size 824854 +oid sha256:d756448def5c265fc08d16be81924dc715aa483701c089b54f76c3db449422d4 +size 824952 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 93f9a629c3..c3cf67bf12 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:11fe278381fd0e6527b6c688a5591004c34d9ba12938f237067d58f19ba335dc -size 723070 +oid sha256:7812d98eb84ba436c4494fd5f39afe31fdfe25e822086da22dce3cd50c742ee6 +size 723170 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp index edf745d6ce..f9c2add1d6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c9ef99fcb2922ee2393d9c859d24bf2d137a5424a27c6b09b180ac73da8e3c8a -size 667440 +oid sha256:45be6126b5c4ae8c6b7c1c0ccb304afc7ce1eacef90c136a784bf64e6cbf1ddb +size 667540 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index 6269257388..94a36e6871 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:55126bcc6246d6edb9344397fa23d674e3e8c54389a2fca603d4e4ee445ff524 -size 708724 +oid sha256:0c903d3e3358ba641dd2480c7278756d4e310bb9292be198de96d0e20701b487 +size 708824 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index d44f533e25..1380c118a7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:295a6edf545a9d52459674d5131014e7f50f57c6ef475e7cdd72a19794aa8134 -size 576855 +oid sha256:a88649fd06424fdd1cb191840ebb641782e1fcd1768145d85b2e0d11453a3f80 +size 577743 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index a3378c209e..a38fb0bc9b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ad98b3ee87780af5919ff40fd138f75ef191e105a8f7414f1fe320c13e38172a -size 612713 +oid sha256:9ec251343cabf6fce1c652b6d2edc0f77827ed6d7271ae613f12d9d8592b1b2f +size 612861 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 3dc9285774..bc07cdb90a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:df35eaf07a02f024e5d159c833877118444811a8dadbd3a9a552311b41523546 -size 812322 +oid sha256:cfb7e731d48462928bacb4dee4bc489f0f039fc9af40ac586e9aa2031c627307 +size 812422 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6baef9ee84..cd730a763f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f25f899ca68a45036c5e492cbf8ac5ad9d8970767cadc1bca28a7d30adf49139 -size 720258 +oid sha256:1bc3ffff5438cf201adefbecd061b9a085b1abebc19392e7ba2cbfd5fbd4b8bc +size 720358 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index 56bb3a3f39..689f30b5ad 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fd7324e5ea8608cc46fc9f636d67b2c6b49447ebc2c590927447c359d371f522 -size 784102 +oid sha256:ba0b1656eafd2e602f23d1649dcd66f09577aa17b6259f2dec0ade42b78ac7c0 +size 784200 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 7f055e2612..51bce5d010 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2439282333ba3bb367492e5bfe50e250cac2711642f137d0d95e36692a6e8ae3 -size 687006 +oid sha256:80de674410d5941bbca40376cceb7807fde346c2f52593fabd667c200052c65f +size 687104 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b6b937e290..c08ef00bf6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e5ffb64e482c8659bc9ab308b94b7cd99198ebaa430e25e807733e19cc3c3aa9 -size 833410 +oid sha256:a9b2b4f42c14076f5a8a4632f85de69d3cd4cde252b6468de4c67da27b4d3ec1 +size 833508 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 129324b5f7..1a045be38a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c74eb66936b2d5252128a88dfe12dc743ec1a4c7bb2ce604591f07307aef0cc8 -size 809728 +oid sha256:8f78870e76b91bcc7cfce3066ef7288ca4a724e2e2b46324f5925beebaa851b1 +size 810616 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index abaf3e411a..2194ba52b7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e24acfce317f0456f3748c6f20537d21faf49123bdad1eb12176e1910732d11b -size 734406 +oid sha256:997d5f2318ed072f488941bcfbf6f78f6e4de0f3505d0c3fc1bc3c90deabd134 +size 734504 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index b7871711c5..5ab96e1643 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:71acadf8dc1d1310e4a2221f61ec72ddf52810b9f280764fd87c2b246864184f -size 742292 +oid sha256:9dff0bcd698eff272446485a648664ef36ba446705129022e01e08913e571361 +size 742390 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 60b7a7e226..a37c77673e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ae525a7f785732cfd23e6abb3384f51d41ca0efd94d290898c6f0f42c788b52c -size 793350 +oid sha256:ed5a67230d5271764a3420760ee1b54983ba3aa37dc78f9d8bc8aaf712a942f9 +size 793448 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 579e3a22fe..be0cd3cb1b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd3389bef4c42ffaa9304dc7579685324d4f6c0551a2501a2ea17e9017969ba4 -size 771444 +oid sha256:51a1ec74b007993942d7018d71627f478897c96f0cb847758ab589ef8425aef4 +size 771542 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index 559b322efa..eb5e08d0b6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:089fb656003bc438ad349b625ccdf1c59af58d8db5fedb33a57bf2db6887c802 -size 596921 +oid sha256:a9e327cc7e7a3f68e4c2bb7b8a7b12fe3c9078eda94a423b645634487998b5ab +size 597019 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index f62c223877..c97f6ea9c3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:567f554fcc42162322686edfd5d50e42ad6096d59d9e9ea62ece9a9539bd162d -size 624936 +oid sha256:b5237dd37607e3f345d4b831213f4ddb0f0cd92b673ad9206d01429e18784117 +size 625034 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9223e915a9..00a83f9b48 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b56d64f0edff702c9e5ab5e4eecfab2698a37d1bb858dbc3203391db383568c0 -size 818406 +oid sha256:de2a472785fdb3e278041e83c5416ebc7936836f037d0908344fe043c686d394 +size 818506 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index b2fe779d0e..4045ea9bef 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b8145aaad79fad885ba1e19d6fb8d96175e9f700fc8153a3e577f4e5e355c27f -size 793984 +oid sha256:4043bfd37b51febaf1ca236be4745fe5edc62f64bc901944522f449106a59478 +size 794084 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ad7a9644a2..f6e12d6baa 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cbc9ae0c10452b490a5f34b2c107a974e17abadc95790abdb4072d3351c7b812 -size 783328 +oid sha256:c94ca982997961a750d852f2a3ef9a8b8d5237c6c2d6e77dcab6647ea5611cd6 +size 783428 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 995ab8ee85..bd0c32e10b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a3b695aeec0ad6dcdc069ea5bf6afce9104f94dff5ed7ad31208158129eee35c -size 761126 +oid sha256:8ccb19539d549346eb5dd4916c84b6b1f83c33a3f76f89c4a54c2dee694eb8ce +size 762014 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index d7bd6e00a8..6dd9f60822 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9079558bcf1e252a81dcf706567f9df511f8b22982beb715001e7a5ad157cb20 -size 831762 +oid sha256:67428584c518366b7b47e63872c1b8eae9752973fabfbde48cb65100a3cf90a7 +size 832650 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ecaec0c86d..001fbe17ce 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b56f290ba08e357492c3276ea68d5f63203c0c723354432bf27413d5345b0932 -size 734222 +oid sha256:6670a20df16aec20d4a4e4b8e8285895c9cc9715454a5cc28faf6fead5f8584c +size 734320 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index db00893ee5..e719d8c465 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:428dbbc0bb18f0a99ab0250a550a57cf4af18fe47f64a5a611da980cf4aed6bc -size 813458 +oid sha256:f926ae348f054921855b517826d3c6743f103223e8b81fde59d6245b6e82e5a4 +size 813556 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 7618ac8640..58db9da761 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3e83df5aa970a9d3aeceae64bde4ec822296226eb712ba98ab4300b6a316f580 -size 710786 +oid sha256:493ef314483897b0eb6b64427ed5bfd0011aca3c66991bb1388f64d8576126a6 +size 710886 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp index b1f736758e..c95d6e32ec 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e2ad61187a2880d9ce586828df3208213d8f63b9699c7e4a1e7bc432c341728b -size 648348 +oid sha256:d1209a55fbad749c9e96e5b50b5bd2bd87c1818022050828a82d8f0ceba41a4c +size 648448 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index e34f225e71..72942cfe4e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:44892d56cb3b37dc724a6ea2db01912326043042ab6e39d0382fa2a49c682772 -size 685784 +oid sha256:eee049b61291ea577240d63d47a8c4a5945224407317700b50e296bacb46fc5c +size 685884 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index fd9826351f..04eec2039d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aaced7767afdeedf7d41710d17d6cecf0648d54216a87155b47b155f936fd5bd -size 559145 +oid sha256:ef1e82b7fc1a0bc294465bd460c916aa276967b2c26a2ccb53592e1f05a64a16 +size 559243 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 26e659596a..5b33cd926e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8a2cb5bc96c8b612dc84793096a65fcb1eb7ddb96dc0cd7c9e1f778b014579eb -size 588935 +oid sha256:45757af2b409984fbcbdba8cc248f34b47a158441ba8c1988379b2eb83f5e9d2 +size 589033 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index d5cd6225b6..6a588d54cd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8afb3dc87df0e14e2628b7d4f5ecb47a172c0f4c8dcbc1378f24ab675f8a3a40 -size 789284 +oid sha256:53ebbcf844408e400743fe4ccc3073505913b8481404b20127e5787ac2161ea2 +size 789382 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 014a657f8d..5c02e4bc76 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ce4ec45b257748cae841d496a38b258c9e4d567467be0f70e9b37dbfa348c825 -size 696430 +oid sha256:e7945ad30699281a01c536e1dcb4b2c67438234d9a3c98df283a1911e170a6d4 +size 696530 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index f38c24ff35..ae337aa55c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a7768c0b78f2a965964fccf1ab320b672b0ea8bf3d9c9d89252d081dd538bec9 -size 771818 +oid sha256:aea5c4eb53dd05990d169e35082ccb006c44fc53dba9519a4988e57c1dd49641 +size 772706 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index ad3f01f854..d93e36ec0b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:531cde631cdbd9adca8a200cb1d1bc94402ce85a1fa1454bd548ac8fd64d1732 -size 674722 +oid sha256:24d5ed338840a2e7d03254484dbfcc12d0731a14a7be2cd34da6a7f516cd4991 +size 674820 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 52b0d994a0..e43c17c05c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9a1f97847eff265ba7332403d2c78fe49d184d29e897ef691bf9d3db1c98cf0f -size 750678 +oid sha256:6f618b10b2218eae64234e1266d17d75f8ee21caa9c7cbc3e7995927f4e7b981 +size 750776 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index fd50f89f81..f3cc6bbc0d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f7462e548157f253315f5e38d24f36be973b9d239bcc4c7d56bd1e16a2dede5 -size 813882 +oid sha256:0966b89254a0465fb315c853a669f102162c8ec930f4eab171ccb814b255af7d +size 814770 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c913e49836..bd1dc1b0e4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4cb07167a2efddde61f8c5a71401380b0cbfdf58f98ce4e185ac97aaece54710 -size 720040 +oid sha256:191787f7bfc0c57c154739a2aca15ed290f9357cd771a55eeeeec96f0f81725a +size 720928 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index 873edeb5dd..e33def6b5e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:57f19b8acdcccc2d33043b8bca7bf9c74c808cb0ab3a5652bc7e0bc5fb07547b -size 680444 +oid sha256:ec591414fdb5df54ee783f604f69f32dc189682b0de033ffccbbbd6fd9f26ae4 +size 680542 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 32d454f3a8..01bb557156 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2d5d9b99631d53c0d6dec053ec052a5b2e6ffca2c62e009c32c8fbc16449f340 -size 733504 +oid sha256:14b4fc6fd0c7294a7b9b34148a759ab032b01e4d0102d51029ee61dd1c7aa5f4 +size 733602 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2280f7f633..cde5c3374b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:db98c0a8d3a86f64ce30beff0e0c15e8981622e98b974ceef7c3ce96ce5bd0cf -size 709872 +oid sha256:6895e7e3b1f6a159c722137eb8a9ef340e9df6877de98593c5457fec9a4167f5 +size 709970 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index a4cf3e7806..beca6b39a5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cee722e9c958dc991b5f94525d38d4b78f619264256722138e5bd1247fadd9e8 -size 745824 +oid sha256:4fdaddd27b60f15a879f7c1740b9fdf27cf1e636f0eb997ee0512539718cd870 +size 745922 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 42583c5014..30b1f65e88 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1663cb60034dfbe9b9f5d3657ff50b67912f1fe43216149da2f3bfc28eabae5 -size 654056 +oid sha256:d453261d21902274bd82cbc193cac6ee01b5fe0107a0399de9045842fefacf9a +size 654154 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp index dd0f7d35ee..2bab4d9956 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aca69ba8032a594ec0cb884d7b606d764a1ebfb7f5d90e693185ba9e9aba2074 -size 756734 +oid sha256:f36dac504235eb64dee02a182d12976b6a7dde2bf63f0d1a9e1f6d08a8db4a65 +size 756832 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index 1a91bc8cce..e79364a138 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5ce8374af2c1344a591be1c6ad83cdd7796f32138eec00b7ddf80c5a451bea08 -size 638770 +oid sha256:8b91b692f061b0815bc257dd75c9584d27ca843d935e24260e625df4bd8bfb05 +size 638918 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 63f8775edd..95640ac7d0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:af1663d50201bb5d33427ecafce8d7a29fb9afd1ec6e4efd22078c08dcb3028d -size 716074 +oid sha256:9fd312a089d10c02eb91f5323fb95f69360108947d12d4ca1037aed8aa43f67f +size 716172 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b92e9fed92..745c3015df 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:874c0e933ef44c08fabfc63ee1c317eba4c59034919ea0f097a12b5ceaac7804 -size 628104 +oid sha256:c0d65f13b9e8b8d52da54f7c6d929c59442f5fc4001d689a2028e74b61bd5096 +size 628202 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index cab5dfc87b..ebf182bcea 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:94907313a001bfab4a5aac9d13358ad1d8c25b78eebceaad9be4b0e71b833fa0 -size 966708 +oid sha256:475556a31f2cd31cdcfc81fd2125eb694231fcc4d0515f516bd55e6a62a427dc +size 966808 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index dc3677315e..f35b21b35e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d8f39ae66ab12d1d0c64576ca1ae882191b7772eabd78dd6cb86e87aed746bd2 -size 914808 +oid sha256:d47192f811c475b55a9896f28b4ab29fc1cfc70539436cd184fb419bc63d3bb3 +size 915696 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index a5a06dd82b..eb196ec1b1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:acdfed9fc78406cc5664b12df3a8a8c925b067a2deea4bccddf3b23cb69d9244 -size 881904 +oid sha256:bc9cfed141d4c623c326fb9257b1f3f4558ccbee742785336547cc71b3ec906e +size 882004 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 207f120824..703b1e1b4e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2fb4bb9cd3bfc39958452dfeac61b6d21d387982c6128ad04e81106291c43cb9 -size 916190 +oid sha256:d1a3131b3c37893cbe462e932826ef57a82bb6fe4716b7f718bbec717c104e5e +size 916288 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index c704b288c8..8c8edb363f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fd43065cf42c8dbebea5f65bf4701f35ff51a89566cc6b67bfbac2fab4850dae -size 866064 +oid sha256:f2578ea41a775a47588c262bba4c982dab482d78c08b66e36b92e45e41a99985 +size 866164 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 84b76d8fc4..6dfe65ceea 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ce841f8412bafc4200f2b12f85514f01382982b314fb113e9e4953ff420bf609 -size 735688 +oid sha256:118fa877e6ef9f2183e3a16ff218f691fc6a09eaab774e29229655f6618ae251 +size 735788 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index e5009f6af6..9dc080b240 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5e46f7cc29ea005d86f27e6a7b4f23104535c2df6c2cea36c055c83b73d31deb -size 943466 +oid sha256:7164501913c211629432feaf4a2915530668041ad93e3b36ac7f4d6f2c7993c2 +size 943566 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index ef9202afe8..cbc416cb32 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6bfac24229844b8456ce5be659b03c4746fedc6c38d0537a73bbed21d640e542 -size 889988 +oid sha256:70b720f5e919b44584561d2f8640fab341bb18283b66a7aa30c3383173ab059d +size 890876 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 56c9ff9e26..4b95d363c2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ae1026437febc439901ece13b0cbdb79c546d09abf6df089cb0b41e515646c6e -size 902320 +oid sha256:2ee9594703754dfcb70593b369a1c2b0e7230f6c78dad6d2bddd2adeaeaefea8 +size 902420 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 06c17b4bc8..61077ec4c7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3682437851032a262f13dc4dee00c7bd7834dab3adcaf3a734413afbbf9162b5 -size 851160 +oid sha256:3e7f168889c0dd7a7c371aa79289e864d8b54415a2aa59f6c8bdc82aaae227be +size 851258 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 07809d8610..56b012c1c3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:02e2a553c3881d10835afc75d8da907212a20315ae7629d96c505251127fb5ea -size 955884 +oid sha256:3c3df46d5967729f62449f099af965034db091f3bca5523ccd648346fb8922c8 +size 956772 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 0e39f7bde3..a6df238e33 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c210e5183e76a852428793cf34e50ed53e0e3a105e2895ba48fb520a52273513 -size 860170 +oid sha256:0a8d41116fb5d9998562590e63a58fbdb434c29b63c4612fa1070057f77d2290 +size 860268 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index f464e2dc52..d6e815f7cc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cf286ff9ae9d8dc8307dfd5f28669c7d4d35f513f1c7c500509c764f1c1f2d30 -size 908474 +oid sha256:a60555bd5a461a6e0a7e659a2e3fdc475757b929014e21021a2cbbe742d61180 +size 908572 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 1476eac682..6de2bb7312 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0aac9ab6e272e06886d909ed9895d7e63789bdacff41ed503ffe7f9536af163a -size 807924 +oid sha256:d6bd6496e35e86150f66854ccf1679f105beb57d94cf73b8bd49a833e00dbef2 +size 808022 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index fa3faebcb9..fcd47db079 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3fc46bac4974e6eed0cead41a67b33523c86c36bb69da0d0783a7e26d4787424 -size 791112 +oid sha256:0efdeb9df90338f6ed3e7f20a26b4a6b344f20004910eab5a88cf3e090bdde6f +size 791210 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index f30a8b789a..d90d662466 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:210d171eca58d0b651dc78fb517aa58d278fe420f0e9d7b96037647876308731 -size 700478 +oid sha256:93eca4d5aa1f1ab377a6bc5da828680370506a02ec228a7650b78d4d0c0c5b4c +size 700576 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 2736306dd1..8cb9c858e3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e907cdac706df70815ebccfc0fa5f2be8353a002c05f978c6354c18034eeb98a -size 904428 +oid sha256:4f33ef804f69abdca7b587055f9d5c302df91e990c916b0c16bc920887d87ffb +size 904526 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 36bc43a487..3cca44f20c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cc1a33e53cc5cb4dbf46342286088b2f727da55402b688158774b10dd6bf5a26 -size 816064 +oid sha256:918facbdf808f09dddaaceb88187ae8f6c1740298ff0f3e7bcd0f669146e0394 +size 816162 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index 9f1543b714..8380121f80 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d99d4ed0cb157bb9aa734a91b2fe26532a9f80e3f334b1373f8cad7f4014e171 -size 858744 +oid sha256:e89f0373b8b4e3cb94802c3bb8a35bf8fdad48716c89a26016ea7eb25b4cead0 +size 859632 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 66027b407c..a4bad1eb81 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0dd05208459cc3b3cab9666fe9f5743f2f536b2401b134b5a14cbe010834b374 -size 765594 +oid sha256:1cf9c500f58fe7ef80acbe386c2578f6a618f9af43fa1d6f59bde53a937bcf55 +size 765692 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 73f3518d4a..793d028244 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:121c4abc4fc1254f3602ca3cb5cd265b9799d59dfc188dbe0f4e2b5c3d156b7b -size 936516 +oid sha256:d26d8399cf8d18a2abff2ed57cf190e337752bad80ce394ec92b27821e5782c9 +size 936616 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 608ff44e91..b1bd5ba2b2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef43506ca60911e7a5bbd8c9fc8f8233d23d44d916dfb3383d9081fe43557e8c -size 899366 +oid sha256:dc406d2d47445257e3c5cdd7f4e9a6b0171d8f56b8560b132bbc20b61e943988 +size 899466 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 5453366fbe..98a158c347 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9017478e7d1560a5eb0ef59ec14f42593e93f01f3984e156ae64ac2e577f69c9 -size 851712 +oid sha256:a28dbbc2527c8328c47476c9861a7deb028b726b71844ef897acdf3acd93ed4c +size 851812 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 59e576db48..f888a1ea87 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d393d0f2781df0027b04b492f8f7ff0411c8a3399e5434c8edfe7e1c08737bf4 -size 885208 +oid sha256:f65143cb4bf440eb18434fe7a0deb685dd9a9960e6fb0c992a2700b82153762e +size 886096 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index a1d03ee724..0963e03f16 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8695460fdfbc7e358a6c9de94d2c5b706b83861d98b64ff7415c5a7892706999 -size 849784 +oid sha256:47a9256e1faac0054d4579c57d0e9eaedd228cfbd33818abd6fb54c705cf1124 +size 849884 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 0c6096040b..a632e04d3d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:11f04bf81448d33f5dda8e1b1d7044cdee5df8bf5656180e26d57be8d1d29d40 -size 705496 +oid sha256:98ea59350cc13aba381efa6770aaca8925ec14d38fbbad0a5c270fc0f14cb40e +size 705596 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a8c91e4a6c..98c2be6df1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e2e172b68bcce459380a76d8921ce4031806dba8843aaea572b8253a66c43af4 -size 913274 +oid sha256:bfc41f00c0743acbb444cab05a452be7007b83fc77b778a81cf2f399715ffb67 +size 913374 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index a3fe1ddd7c..4ac0cb9dd3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:43f3fbd7428f79820729a5c11ad73d32222a9562fe8c14d42ccf09047e824b7b -size 874546 +oid sha256:16284c25fb95f50c0738afffeafb715da11d432d5a67e7f84b772c75d3e4feec +size 874644 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index d29dd6e7c1..0bea25f5f7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8b104f55ca6bb9842f4c60440138f55f1700bbd8643edb75b5faaed8bec672d9 -size 871388 +oid sha256:d503bb8d2e98dfed4dcdded98b2c9ff7189110d6cd8a71abfc3a501d0b48ed75 +size 871488 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index e99e7a4409..85a6394ae6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b76238962292b4628f538038d694e3b4a9eaf29a5800d76e44ee7891fd41a4db -size 834930 +oid sha256:1bd47363f70edeb78d4b33eede0dd7ef0ccd05ccb92b6a9b79dcfe8cf1436449 +size 835028 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 2f93222669..7e5dd9cd8d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5c3511522a0078c99c1528b59a3af09f3ba7f77a7b5008a40457e1ea1d04bf4a -size 926532 +oid sha256:0550c95f7d22a1b46b81986935f985d315012c5a5e97f80c5669c6b84131251a +size 926630 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9d984c888b..4547feeed8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:89a48e39bdbe1fa99657cb205cb4ef907e5b73410a0c1427ba600e0128bca4c5 -size 829978 +oid sha256:c55fd1c7346151993508edde89cb71709ec76f3a96977297b08c7d4e254d12f7 +size 830076 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index 3a982b34d7..c0e2763e95 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:85a8ecc56fea5dbdc905e9860ed7ca3c8d67105dfc7cdbf81966d637552bd4c3 -size 894512 +oid sha256:0d762c305c955c88e3dcaf62557cf75dbb131f5474fc3e823c066a92df608fef +size 894610 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index faaf213473..37bbfffd89 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e123cc5507f5da5775db21e4fef48c91585210951d4b3f7bf0cf2558b3bbdcec -size 792482 +oid sha256:8e38260bd0faf1d4fc4373926b72aae74e808ea9daa89e0031a28b24b21a1e1f +size 792582 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index 2af03a525f..c76e981c78 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:70226e504c7c1ce6fdf8686d18e44a842963008c0be491d285ad53cd25d5857d -size 761856 +oid sha256:f5b3c8e5ed4cc232dfd22cb35cea91eec3dab3322c8e7d979fe0b246a212f696 +size 761956 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 40bd301f90..e18bd687cd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5c7e1f4e723ce3262ea4458aa1aeae3e948b2558a9a9b21b36263103561b47a0 -size 670336 +oid sha256:b43c2132a706e2df8ee20443fe4a9471685f68a549ddfbbf1025d683fff25e15 +size 670434 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 695e4d58ce..70ab64876c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2324db4584c331c797a97135d9efd566d93194074e3efe2c91ed8cc0d15125f5 -size 875026 +oid sha256:4ea8dc7d9e6157850a555d449c73c5be382e65de876ec1a3be50792a674de2bd +size 875124 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index e0e49eeb53..f862044752 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:be5f7fbd7c093ecf40ebbb698edd79c472bde82cddf62557d5ac020bc4360b9e -size 785922 +oid sha256:ecf8efefcf600f6c88193a1628f1b012d467e30ebca95eb058199b3543c35ce4 +size 786020 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index 194ac96c83..5c57715b67 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8444e98919b393c81dc0b2d80f039ac550553cd2f96973f8692ce8df549622a6 -size 844782 +oid sha256:081f6e3751b3ab1b84b46bf0451b7ae35adab9567d2e2784bddd70e15a436cea +size 845670 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 50f723ac78..2cffa9d0ad 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bf016ff9076bb8f9700fd632f33c4610adf177ad557e0837dec82916d50f6462 -size 750152 +oid sha256:d6e762937dbee16e6e1978d99447ec1a6abd779498044a0ed216d0916decc967 +size 750252 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b4282c57d0..a5be096edc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3ca31d14d2b91cc312a6adafaccf0e94840db22887bfcee2c683b8a05efc5350 -size 829464 +oid sha256:cb3b8f71730a8837965a454b6f6ac4095fe1f48597f38f838a04ebed8988e9e3 +size 829562 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 259ede1b09..bd707199c2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f126f325b544d23870eaf634f0afe79dbc0a042b1985addfee5175adb7e322a7 -size 792462 +oid sha256:dc39ac8e4e61f2b1a1b110ffbd4b0b2cedacac201a40dd535d72e01de5a4f7d7 +size 792560 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3218baff92..8544d4c6cf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:298b80109b5eb44ff433b28766eb057af740367f03a7b268691747fbd8cbc553 -size 805530 +oid sha256:813720f3ce9d5cd618a49fde609856d73f096d5fc0acd99288553cc3acaa637c +size 805630 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 693919d1eb..2286c953ab 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a0ea203c8880a3668a48398e239757cb62cc52bcddf3ba71466421a8fb14b83d -size 777952 +oid sha256:afe2ee1be5643090ea84e6389149eac162515dd59d07812af6bb5fac9fbefc88 +size 778050 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index aec7cbd410..f5fb11d4fa 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7c14d3ab2832bfccd6ff4f6fbd99cdad4c8515aa33c0a75e4ddfed4aaa39046d -size 821008 +oid sha256:8109309c9a9c09673b9ea71f5892c78d38510adde1d94bd246d948173c0a20e0 +size 821106 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 805342d5ef..8f25b6ff4b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:99ebd8e3d46f2395bd000340df38e2c4830c989a4226e959d5de61d04cd596df -size 726280 +oid sha256:b445f658dddf8f93fa1df3978edaf140bfdbd31e0a156003391d7e68d696bdd1 +size 726378 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index a472d0047e..997e3f7618 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d15d97177eed3dd4cbabc4e2a9ff678d40b6497711f0890ac731c6d541a72a3c -size 788446 +oid sha256:54762fc805a5f1ed335a01996bd4641be30f31ae789c49198acb75283c2830b0 +size 788544 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 7e8e7d8d8d..b3fc78a0fa 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e79504ff7f525bc7c3c85f55b1543b3526104915027969cca0dfc438b3c27275 -size 697368 +oid sha256:3c3e0daa5d6ec17c2442fb5c9761da2dc1a7e3f2e4175091f546728ad7b93695 +size 697466 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..3f16d7d402 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:013f6b2374599d678a14db0f8b8cfa09a1ec22da59123160fb4e7bd0c6c5c7fc +size 660670 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..c1f6a0e6a1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3764a892a39d7812c701f3b95e43a40f893be9cd3b06d261cb0e7602b4fa58f +size 564017 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..97032090ed --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9feedd9d145c966399703d48a432f42dc3fbaee61647790a43999d1edf8c5154 +size 684028 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..e5aa6fde87 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63500731eacacb48edbf031f7368da2002f5c7266ee5d7ba6c4f9ec304677c54 +size 585105 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..ffa5600d15 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc986fab66b70b81a64166ea8e9453d0b81927d5628b4f1b958eb16b3ddbb728 +size 658644 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..a5d580d046 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27c9c94478b1df2515a6b38cb4111cf41b492dcfbdcd91a821a339a1dc7bc6d4 +size 564359 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..027c1a3220 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18217837892e97249a4abd4d43009686e47210603a7ed6825396402b461b48d7 +size 680474 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..da87424aaf --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd6829c9414784832c80795d7f8dd18e315b6d983fa5b97c66e7c99a6860d6ed +size 585893 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..83550bca8a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d061c4ff218393ead1fe94d29cf23e04c5952012a5776d7921e21e015d2e1bdd +size 728140 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..dac593bbf3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30f8bce2be539d62c3ecfe80104dfbf58318d9501a11e5f3bffb09c39e654555 +size 630206 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..8323518ea1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:306b0f8b66f843ab3b501ab4625704a368997fe90dcee22ed3bf90f0a9a7e75c +size 751302 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..7f3c5e9dad --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1cdabdaaf64b8d3b7e6bff93c40e117b075070ffedf615f367dc864f80d1366 +size 654058 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..ed839e9447 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c755dcf5183a21622d1f8c26745bf4bdff2b7c98db31895f7355dcd91018c74 +size 747152 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..b2624f993f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb3e1aa62565abf60abda6133d2629d83598ab576a1103b8971ad2d2e9eb6d6a +size 644828 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..38213d5576 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13fbe17d4e1ea5b9b33989fc46b6797e51766276036c6f4b15d57f6c8551a18a +size 769328 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..514983a954 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4be7e24e67e90c905946ad4de5585373fdd527771db8dc9c96be66f9b315d7d +size 665424 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..3383b29c20 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4fdf46e444476b158d7fbc7896ec3956df8d391acf65c1797b4578200f32b4b +size 740000 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..d7fa71b86d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bdb87a03da100c717e3d918f0c35841868d6eb9b311a6cf892ffa9d02b5ca34 +size 637674 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..7f2ac9a839 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8053104832ee102d8a68e947f675c4390d41dd51a50035f9a76602453ea90b9c +size 762174 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..7a786a187f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0231d4aa665afafdd199d07d440fd7775a378a933b558de4ea746a53ca88931 +size 658270 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 4fee077da2..8789f14470 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:361010c6a998251c70e96880004b4a3699de93eca198cb14a2977a09cb498de1 -size 636872 +oid sha256:e671785c794ac4ef0d032cb24ff36445a440bbdd9a85fa86f3ca7a3ef5bec012 +size 636970 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 3bfb4666b3..2dc7cc97a7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c6d705017689af57910b76aa09f7e934b70abea1b3f058047d0ac8e9d85c69d6 -size 593899 +oid sha256:ea3cf8c537abe873e670af3bdf0e0f3d8011cf74b3eeed12060545a1e2d71145 +size 594787 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index f6d0b3020e..85f5e2197b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9653394306ed5426c94a7229503f148469dac6fc353baf2f9db4dcce694c597a -size 558307 +oid sha256:606948b8804f6d3620c7cff04719983d42c9a0fb4247d1f104b51f365f1dbc27 +size 558405 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 515453d07b..a508d5af26 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:27a405a7e2e74907320842749f3096bd539cce12da4b3e2b51571ca6c3624257 -size 520269 +oid sha256:405cf8c77d935bfb575d6070972ddee9fb0f5c0fb33ceb217c1ec06bcbb88779 +size 520367 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 4da873354e..f5987b44ad 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6261509c207624e107e54230c0c689f7a04876f02c155b097571043cf1e7b55b -size 626950 +oid sha256:2890803197fef650ca23f72be6c7458411ab0c78d548a86008d123bb66c67e8d +size 627048 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 1595fa4953..e3ca7ba1fc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ed7364cd126e85f25368e5ca7c4e69cdcc686cfdc5ea96728502dd9ae3d7700f -size 588565 +oid sha256:2c66e559351dc7c74d898893d05d8bbab23144f484d608bcba7a4a8b5d2eb0e5 +size 589453 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 37de5ef6fa..6a2d0c09bc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3f0e8e9f40245974950400282ab7578f4df0810b78279360a7a6472b5efd0b32 -size 544833 +oid sha256:d39a604bbc9a85cb199188ebdbec42e9399619846d2bf8cc53cc462c2113fbcc +size 544931 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 1cd0f74baa..718fd9a1cc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:221fc2431a655dd0a26a06f9d2dd333e83a5af56e482e1c98a7c4a5ba967d4e2 -size 512221 +oid sha256:6216b1ceef92a7ac6b1b22f7b5e3cffd117218bbefd2970f46b1b221fa1aae35 +size 512319 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a246b6b262 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34a902d1020b48c7b7084bc36dcca03945dce2e030bc94a815c7848fb96898c1 +size 740392 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..7ecc359a73 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0d1771904a8a93e0ffe4a9edf89726623aab2eff9b7bb0ca58af6dfe2cc0f7c +size 644678 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 9e3d4dae40..f7fd278eb3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ff7b0eb6df46aac287f579576f60c0965301a86ee0db539405156f62121473ea -size 642376 +oid sha256:7d9dfc61c06776519ad6cf7c931589415c6a8d977f599e23517b9258d7e1415b +size 642476 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index fd5a858fb8..77ce4b28ce 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:285ecfd8d048f0b30b6bc70b79df77cd462097dd8bde792961816679c5bb517a -size 544539 +oid sha256:223dfed5249b5a1f41a1503b98973ef433b1308a90ae99ee040ec61308af74d5 +size 544639 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index e655e45be5..991b079894 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a669323d5bc9e19f67319e0ffaa2b87338bfc0f51137da440b43bfcf18309ff4 -size 587811 +oid sha256:e7c22ca841058dc49035e5996b11547f6191376d7a464420ca5c4014b0018b8f +size 587909 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8259faac41..198e190e9c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7e275c4a2105a3d88e8ece19a0af5c088d263320b86fa1b763bbd2ef3fa1e201 -size 504725 +oid sha256:400610ea9f3088b91bdf84881f8c7cf3b49fc487f9068a0bf5a868dc4292f8fe +size 504825 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..0a82c387c6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8f75ff5ec8aa1bd59e9b7475abebbe834ddb10dc119b61b615ccec270a5d6b5 +size 763454 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..b11dbae31b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b1947f0231ef8795410a540357547c59e7c1b3d045ad57e70a7be60c4db40bd +size 666112 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index a6dc574475..e6190b2e82 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bdf533da2ebc3c330c5c785851668665952d0e52b1cf790871694409ed173f8e -size 576243 +oid sha256:fc231cd564ea06a37a869a1619e774ed9825b27df42005f5600e36a3db408cc0 +size 576343 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index cd5bbb6931..f7b13d44ca 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0c293d6adda5c5d63dfcfda025e4c14cef629aaf88e5b5213a046ae222fed4e8 -size 469775 +oid sha256:db88928428f402c2c76c79be80d0400a15cd2b5d3b27602e9908a0a8c37d0188 +size 469873 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 9d33da5c6f..7a0e0b704d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:81cdc2fa1d4c6b0edda86979fdad511ea6938b4373447c9e6e818370b70ad77d -size 537909 +oid sha256:a665cd12784684ec139cbd88afecf800c6ca73d227e063e944488c8e870041cd +size 538009 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 11e1b4a47e..6982b52a27 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:17a20152562ef5900295268e62be0bb51d79c6a1f94d18316bccd1a10428b30a -size 435535 +oid sha256:7f65663020e89198ca33f3222d962b0204f5b5963750a8e38a638a2648005ffb +size 435633 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d561ca3bf4..d34eea58cb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c23aa8902912772cc002be18d3f7d4804021fefe236e5c7ad131bede8c758147 -size 629718 +oid sha256:a457d091fde08e2f7b0ab3980dbdf9f637959a133862cd3388763f568bbed74d +size 629816 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 1ae106d305..ac22563608 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4095fb552480ec69ac4632ad4ba922c12cabd9e981b35bda7740e2fc3aab548a -size 586745 +oid sha256:af4605a374ac1627e2814001f8a119a48ba58fd906e520dab353654fd3543477 +size 586845 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8a96d12cbf..ca45bf4bd7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b33d346bbc1ccc962624a207586efe71f28d5e3e0cc01be337f9db8fe7992a9c -size 554311 +oid sha256:2fe39d69ba194f94c954d03898ab0d4ade727ceb7b6f898d8f7c4759da12d9b5 +size 554409 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index daa1393bb9..c1efc1fd33 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e524541f61443169b86616e8a78665d17d2adaafc6f0d4ecf24d18862c02396b -size 516321 +oid sha256:5ef077f14f3a210b16de0075b2328ada60d487b665de3676b3b58f29f06c5081 +size 516421 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 322e52f965..c60ee73caa 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:78a88a94a9b4d4898e4370f85786ce465fe9b6309702c3d9d47f3e681b63f822 -size 619796 +oid sha256:4b5de01597253aa4ff250e1a288c2f524a69195d7e0c772e6f949a657f3c14e9 +size 619894 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 22677b20e5..e97d9facfe 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9eaa85e2486cb2667a66e3eb2f1d267c427fd2c39441973e75f848f2ac2779a6 -size 582003 +oid sha256:029fb6edf68284570ba16d5474595475211ca029dac1a9586b2c740190d0ff23 +size 582103 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9b34055ad5..31ccb72e97 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:df6c7d52a8fbfa9896e00bdcfd575877ada955a7cf16240e6bdd92d2a56821f1 -size 541625 +oid sha256:35d105f761400aada877490edd67df27d9f2c87cd002ccbc07f7b2b73f78fa36 +size 541725 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 30a4830c31..7f24396f68 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a2497da59584f0484a2e573ec314f4eade4a87b18c19afc94e48bacfce2d4958 -size 508225 +oid sha256:ba0826f4d4818633acebd5dafbfe44e2ac863575a5c81f4cd6bd28a1e84bd45f +size 509113 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a2031da655 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f093601b02e7dcdd13a96018c9ab53ec01a62743f0a588cccc7c861d1351e72 +size 733140 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..a8cd19ca43 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:238b744f51a2b6d10d214b3f2222dd5c92179636629064ad4228527e6ba7023f +size 637524 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 1c8a380858..20c76abf89 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4d884ead10d341f88450eb0705bee5da45dc77573e7276c27fb50c5f566d4c77 -size 635224 +oid sha256:9f09060000fb26510411d05805fa66c1fa5ce26b7576a3b9932cf8d9d9853573 +size 635372 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ae84bb20c7..a669e702d0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a406ec1dcc07ddb54695398ece57e93ff4efa6471d4786a75efffd88486492ac -size 537387 +oid sha256:e8fb78a2eb272006472c2949093bb727320f780086b6cb75f676e4f179be4c0e +size 537485 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 36d039c710..57ff70044c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b23b9d57eefdb6b51579f1cd08ed0881c3dfce9e30227ce5bb358daad525ceb6 -size 580559 +oid sha256:8255786eee545cf32bdf0306f05877e13531af9146a7772730c903be510c704a +size 580657 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 235779de74..63a43e2a15 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6ab27ac2417b34cb72c342b9453193f596ef619352ded2d15ed4e2ef9dc0c4a4 -size 497573 +oid sha256:9cfb8c3d63aa010a443fb7500d355b0a926174447cdfe749838792a023320e84 +size 497671 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..8e828d373c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a3f1d9a8b7d3a2ba3f3ac3e17351115b862683edc1b741e0936b1f8e9c14309 +size 755610 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..e5911dd3d8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:359d5bf210efc21fdec33f9ceda84634eeee7904597f3d6aa540559c581c662b +size 658958 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 859b75842e..e5ca3bdd28 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bf941fb64d31f9eb4a7415d8e568c736a84e2b2559481ad6f2bab4dad6ab1d41 -size 572247 +oid sha256:8ad6af127b6458a59fc7f2804ba8f7027d8541514ff1acc76675889b898bb843 +size 573135 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 1a0bf6f828..596ebe0deb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:25b1e272e5364cb4ad5b658e2f1be13eee9102bbc3425aa6d5aefc36db802474 -size 466617 +oid sha256:ebf16a97d54a53d89f1e3d532815ca5f9bfffe27ebbd09ee0ee31564a5bcaf94 +size 466715 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index c89955b6a0..e6502d98c4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:005781d0d275e7a044a5e9949b6f915a649a424447b151c5675b747249e7f9f9 -size 533913 +oid sha256:28c4dc62186708b25b75883af7ac6c3393f2c0a36ff76e7dcb9e5e3bbdf8d473 +size 534013 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 86d234fc05..65fa278055 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9c2e9de2e184a996fc21330706865d188baf7a27fffb0b6c610b4469da47351c -size 432329 +oid sha256:bfe72f368dbe39d653ad3d04c1681db1d2fac74f2983819b33f1ec00a0513cbf +size 432427 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ac9edc41cc..8640f5bbf1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:016175c25ff159aec182965704b830f22eee8295d84c798fdac13f66e3455ccb -size 655554 +oid sha256:21b98f04ab7d500d328c0a57cbb1910cd70f1411a9a30b9af88d76e535b99c44 +size 655652 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4ef8495e0e..3f01601408 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c317a3265c99747210e5de10b9ac918e5cff41934af704636cf1f2bf30d35fc2 -size 614159 +oid sha256:cfe1063a3c829443eae6e685f8879856c9d38a283e4f83c64bbd531a2096d064 +size 614259 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3735a27bbf..0b05704131 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6afb414b81fb09f27a43278414a2c5e3cc0f47711b6ae121508152ea39ff39ed -size 579011 +oid sha256:b4c62c2dac983840c5595fa43de71d1ef80fb13e0c8a3736b130e8b24431147e +size 579109 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index f02596e1ca..ad44f07942 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:960e05281b0c9c90f667053286864881f00118fd6fb892a0a8f8c2778c556b0b -size 540875 +oid sha256:71f9840aa6de043100974b10dbd234d15416bdb24de191120771e7f90d3992e5 +size 540973 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f1c471c0a8..529286003f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:baeea719ba626aecae898af4461f35e70d09b277495af843db408148690d6f05 -size 645680 +oid sha256:92c77383808c195a9d2665d2945734e3a71b014b3acc5c9f9275b703f38d922b +size 645780 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c4676f0b79..2ebb241a06 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f666795503e9063738fcfefe481d511709a9d44dc63c7d9c4495cce37024b041 -size 607247 +oid sha256:51de49503b40ab26ba8df52e564e2c01d2ffc86d7f61a7a0d3ccaa1d95dba21a +size 607345 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9e02519794..0ef00ae95b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:31e97444e4d11398f256acf8eb9519a0af990951f7309a62d13d2cea9fab49c2 -size 566327 +oid sha256:67d1c2f91bee432d4fd3b2255ecea76df209b2be0877c30be94d96a388b059a0 +size 567215 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 1bcdd893f0..b511dac4d0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7bea7478d3c5e6ba4d15ee2fc49e681df2b2693b0d209e7bf735b4a207f4048a -size 532087 +oid sha256:691a4b7d87e9a1d95ba992d22e422e0ed48f2d1207008c2a9dc8dbb66e8d4721 +size 532185 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..e565223e3a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7890bd77c6e857e8098936bc335a13596a7b2049eb57e041075a89490e3ee807 +size 815364 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..88667dbdd0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6ea467b20f253a115ddd477c9ad1d84222f32d93cfb4726cec329c51ce83fed +size 711164 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 075a228661..1e3c28fbf1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2aab98910d7f20682123241af0d2565986c38386b11066bfda1b6eb693f1a56f -size 660368 +oid sha256:2af77d018a5c70c0e2f6db4deaac8c87fb87f6080661ee21db7ddb3f9a385b09 +size 661256 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3d5c3f6a34..74963d9841 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a3ce859eb3a1949a0fc5c03d0d1b40693322cfbf87855199a636e2911615e312 -size 556217 +oid sha256:d7b7ebae72c094421803b58a09a7aef6f84defb2a4012847fa154e9d6d595024 +size 556315 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index b3fe100318..e1a937fc21 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0da24562f453a005f7902a42d4bff7135670ee9d6f03f4097c1650f1a494d814 -size 613499 +oid sha256:5cd8be7ed5d8f22bd57539603250509f0100c0d648b153dc78bd1520c104364f +size 613597 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 1d878788cf..e3a3d62125 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:91c3d35c928576d2d2e63b4e0d92938e16d5b9c5d68332a222dfcd100be684f3 -size 516353 +oid sha256:31bc71075a97b96b503033190b08c2abbb87d79632f7bd899ab2a7ad810363a5 +size 516451 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..48b531e4b4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e42e4359c13eff21caf70f16b36d07206273426d010214a7df644382461766c +size 838870 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..a7bacdb469 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09db80273d580f171b5d48d17fe3e9b4b9f93686dcce025014ab53f7dd88a672 +size 732500 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index fa9147f7e2..ae26bbc46a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c192e2cba8d0aa16118070c36273d2fac9a96138813e780b0aff72e4e2c84baa -size 596553 +oid sha256:bb369d1d3b4f7dbbda3720a1d2580da1b2e7c9f39fd726a89c4c8ae1d929723d +size 596653 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index b2cc67421b..6f9d146f87 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2d885bc791337a65a439ebb4a402c441f54187a72213efab64ef57c20fc270d1 -size 488753 +oid sha256:88ce48dfa28cf2479295b644cbd5fb9f6f740bf876c3179187f3d4d62ce459b8 +size 488851 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 43bea0024a..c6fd64a459 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:37ae0a7dfdf9a4bbb21dc5c19a9df7e06da5a1f1d426b0150a5d0cdd6a4e74b1 -size 556887 +oid sha256:51212ae7f95737dd0336128d172b9ed1b3942b24a8a2cfeb2b47eee88a448195 +size 556987 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4d49a032f9..ffe6ad4b98 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:74831c715e9e0be2cc2e6f4392d415c8e6d13e3fafb1297cf2d141df50f260df -size 452885 +oid sha256:78cd8a7ef99551225a453eb090922846c12535055f9d2170b1f0342772c0213d +size 453773 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 53fde08c05..e0f8ddf99c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:973860d917be3bd5132c046a854b686bf789eb20c443bb61fa15bac645ce18ff -size 648400 +oid sha256:ef066afcd5f1a5360dc99074c5d78dfba71480dc37fb671e87cf60f21260eea3 +size 648498 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 46a9b55b0d..e09bfa82b4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f0c51360cdc764b05792da660d181a86a1012031b26b7f6f2e348226df7adc9f -size 607055 +oid sha256:a34105b6963c983da696ee3c98ed2dd842e3ca11f000cccd8eff410930f4d717 +size 607155 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index fa3a62b4a1..f574bd625b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:280019b9154cf2bcad484c750ea594df2e2cdfe34ae68f0f2fb1525f2cc853ac -size 575805 +oid sha256:2275a6d0fdb93604d91679d18b905eab2c9da1444ff335ece4560fe4479ddda0 +size 575903 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 9a824fa6a8..6935fe073a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:46070099ef60854697b9db0d54c239aa7feaa75699b015d97cd6b6a2f1090a87 -size 536927 +oid sha256:918394b10a5fd5bf47612131369bc50bdb20ecd42a6566f7540d0ab9a34a40b3 +size 537027 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index eb3316fcf6..63fd80297f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:48bb5bf3813d1d508d734e680ed4f78543c17dd2a41cedb824b19297ca16b12f -size 638528 +oid sha256:e67f8194d652feb948653c841d69a12f5111dd5fddccc9f0b8a837ddaeb82c25 +size 638626 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index d427bd636b..3817b9b9b3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:17ee71e2168d85f17ca4c420bee08672cb03f6266276a88a7cbef2e3cc346b13 -size 600093 +oid sha256:2b25102857862646eb34c79b3f1bbd456098884b68b901c4e21d6b43e8b59e88 +size 600193 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 29f5fadb49..1e2a86694a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5e0925ecc10582b2b7beac4a6f00a2d10c015afce907844548de4b57d7ae09cb -size 563119 +oid sha256:a239b24209abc36bc339ddf0b7cf55c4f04d5180b0fc0b8ceb79dc81145fa61d +size 563219 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index b4fc127996..5ac31d97bb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:96ad4dd2630f4ca1f50816c0decfd3c66c26660e8a90b9951df713d11ed160ef -size 528929 +oid sha256:6ca5bf83fbc73fcd8454d55629da6340d458500555208618675868feefa36565 +size 529029 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..dd2a7b1109 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3978f8e5d6164c01bb237de43f60384ff8b163d03838684ed7071594c1d068c +size 808260 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..ee745d8f25 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2847da97579e93712e82a7c5bc18d2b90ad2e780d6c8fd4987fb65ab4063ffa5 +size 704010 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index c764ee6af8..b07388a8de 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:19cd27fd0c4a353ce673c042a30ba3ea7cfe3bb15350badd651e5df26e2a88ce -size 653214 +oid sha256:36944b5a7da8545c19ca4fdf163704bf7d0c3d3aaebeb92d4aeac3bcd2e73218 +size 653314 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 10273ecb4b..25a7e5556d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0aef617f628e06df85f674f3ab145cd273dfb6d787420110492042153ede75dc -size 549063 +oid sha256:fc00f5708d43b7c998fa3896d9338be47892dbc8ba5109d05235d51cb88ad288 +size 549161 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 86d0f410b7..a5c45d6483 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:87a22b003be335d23e9c2dcca57bc60bc24105ecd0edba74437e6c620dd96720 -size 606345 +oid sha256:b8b47e811000adeb21c0c925eda2f189cf681523d5bcbafcb117488fba9bd889 +size 606443 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 3d2d31b1e5..75113192c3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:be73c99185da0f78bce1999a4811224becde9bcc201b9ebcab0007715b076bce -size 509249 +oid sha256:6ef46968b790adde265f9c919b95a240c9c6759c234db38bc68235cf8c33ab67 +size 509347 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..00d95b1b93 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f88337236cffd905f49167ba2279159331ed151408a5fa1b2937ecebfaac5e95 +size 831716 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..ec03b4bfd7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0af9eaa4facea84340165a5990b46182af6767a589ec03f8134559933d365c52 +size 725346 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index dd59eb81f7..8944d8916b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8574ac541e42f4ccdff6e1c6320ba0010469a88e870d1389b2a04b28639c2473 -size 593347 +oid sha256:67ab727572cc0ae95e0ff2aa1545d6614b5f0b99b1435ade6ebd1dc48f72577d +size 593445 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index f454560b17..e7e96b9ddb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:da890121941360a4acc9bb1e887ecfeb49c626cf5ef66691b264eca41ce688fd -size 484757 +oid sha256:90596d89673c64f214c4c2f96eb72da8100165bda24dffbb00bc9c36791f7fb1 +size 484855 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 9ae33475e2..7156358836 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fefe93f02ac25b19231ba7ec0bad22ae9a4096982826b0cbde1ef2255ddccd2b -size 552743 +oid sha256:ef81c79db288968a27fefae730a369dd1bb5da441affa9303f6cf580d0e6a04e +size 552843 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 7a258acc6c..72585c725f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:58349112f76706f18e70bd4d4d3167a1f28dfc6b83efb10e6b7d4d7192ba95a7 -size 449727 +oid sha256:9999154dd21232a5d5abbd01682dbd499759390c45d9bcf8e45bc70a4f467fe4 +size 449827 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..db08f4f4c5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2e28c41e9bec00f5f1502d76b047dc569db028842328158b63cc4b814f00b12 +size 622978 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..1a3915c812 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3004e5075ca6c44c6b22c82f7eb6c1b6df343f89773791e7c470ada7c6e17914 +size 535945 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..54d6ad9448 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8bd6fdf6095e3236f9a18d3d37215f825ab9261cce8858530cb2538f60f3f28 +size 623174 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..ed886aa144 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fe72a3caca1162fabf8f2801810d8c95de1f1088399c19bead410b88926fedd +size 538459 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..c8dbcc8b06 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97fa5e92efada0b89a34aac14cb6549d5ab8231ad04ff18e2abed8acedb9e326 +size 685466 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..6dadc921db --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89735c8fb137bcf5b98280b02845c039a763e80fd062a8c392db4ad81487c94b +size 602429 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..2e5e7da897 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:997dd15dc491b064aa551a82c4c617e998d72897a65c71bf3837aae63788fdcb +size 710892 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..17fabddb5c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a23986160de838846122dba82b7cb1a21a665008f277bd742753ffb6ce352463 +size 625538 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..30185ca21d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b0f009094fbaf5111083de65fbbe435eb27748c1dff43cb3a9d0fb7270c0ed3 +size 697376 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..f34a64f283 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:290de9025235c4a3bfec1d61f487b0713514521e9fe64f1cf73068e0abf03a96 +size 612019 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b1b29946f6..ba53e95de5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8d7da7554b8530aae1412a0e272b7b4a1c103b669e9feb68189e53188a659376 -size 736180 +oid sha256:0766ee10bc3747f31a5fe26a0c9463f4453409517400bf82988b07567ee98a41 +size 736278 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ca7967be5c..08ff9d3940 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f265e02c0ad377fcb28a843ebb253372f6dd86dece13de775b2f7d404cb9c5f9 -size 696662 +oid sha256:664d07286cde46c6213dd6cc8b4c23bd8fbdd8bbb93970a569d09f6cecaeefce +size 696760 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c6892f499a..b69e10d0a3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a4a5ecd735257b4a3b7479b0ab8b454d9760a972651668ccef8c8054450e1b34 -size 719006 +oid sha256:17bf9e885698f4940f239717a5a3c33bf29f947d4116f0c9496e74d1547b7f6c +size 719104 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index d19e86e2c1..3e3a6a90ee 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:325222cb64ad6600f5d49a864ed94527143a82c2d470d3098563331fa497771e -size 686788 +oid sha256:e2a8b95236e634bf3009b92061a68b0a49ad3cbb3a30d59973b551b26194495f +size 686888 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..32282bc1ce --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4208af2cc30284ca4017eceb168ff3f104ca9a3736938e51b27ba0025588fe30 +size 712322 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..312fbaea20 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2122a6efd734a658d4e8bb619a9a2757750e0a1a58a115786346df05b270d0e +size 628644 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index c76d60fc64..5b927beb52 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d63f90e756a37f8e6086dc4855a73ca1ab6a7bbf3ce8600458a786aca6274464 -size 752242 +oid sha256:894feb3d2313c99dfc4c03cdbbb1ddd86e3c028dd9725c43cea3626654ca933f +size 752340 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c9a2fcbe12..0ca2c58541 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f55bffeedab07a71a36564fdd786721a470dc5a94cfcb03edc8c638f91dfa1b8 -size 653764 +oid sha256:ac2d02433125769b75e019b3e0eac235881c3a8120326ab6e40a6e651c69ac29 +size 654652 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 13c8211bcc..3cb4fd2a50 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b28eed0f60cc6567ba70ff74d2ce79575e13308e60f7501e74d9b14d968f5702 -size 688402 +oid sha256:f2365aea4c238763e40c624a67b0e5902c4e5f894106c13fd78886e6b9902d6d +size 688502 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 1175fd32a4..f0d6ac34fd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f21efaab2634d763d9a2af3588a6b4684819b32012aed1cbe8344457c3aaabd6 -size 606105 +oid sha256:5ad0a79eb170b9a4ade64958d0128c9da2b5e3da67939aaf7e3d988590a9f217 +size 606205 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 86cd387020..03d635197f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f01fb7e8390c4632c463780447c5184677d737a1797cd448ab16a92fa9641c78 -size 721872 +oid sha256:63169e78350a004fef2d6ced79b6cc1686a2bb6bea6b08213d72de7cd4e1a1b7 +size 721972 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 9860d8c8dc..dc2677f7a0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:799073d822fa64e360e6926240b45f473c1248efbc3984a32a450b47eb974341 -size 682404 +oid sha256:2a1001e93a91a1212dba2ddbbf31ac4abe7d6dc5883ddbb13b1cb09d9e88ad93 +size 683292 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 273981b4a0..c149f5ba83 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1a8ac388f627f132c3d13d278e5365a7c93d36989c2a5b6fa09b997871629719 -size 705488 +oid sha256:42961692a01286e39435808aa33ddf6745f74e8874a877b8ccaefcbcdd0696f5 +size 705586 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 73df60075d..0a6446dd5b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cf9495224e8ee579f0b6a82771cbb03a5ff7b01882d590c4cb573bcbd68af122 -size 673320 +oid sha256:6ebd582cdc9b7c1cec22ec385e194d997b39b6e0569f8a8aadeb09655f60e334 +size 673420 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..e04bbf3bb4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89b948ed4661e002c7bf8928755eee4a8d705bcf9fd4e3911457405ce0d7f48f +size 698804 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..edc6b36e0e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73d5179709d38ad1c9933148ae50a0b0c825b5ca1eebae57dc8151cb725b56dc +size 615915 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index e5f3eeaf8f..06faef6b91 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:674755d4b8e14dbc4115db751b0773541c5d7bfdb5e315b4975b3799f071fcdf -size 738724 +oid sha256:968a9bcc098c4fedf0e31d48154e4bffd2a816cb6b58bd272217a59cb34b90fd +size 738824 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 93f0a2bf47..757f3d2d21 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d4508a4a86e5d9cc8c7e5dd9b95827ad7e38c67d4f1262244ec73add8645e9bc -size 640248 +oid sha256:04a989e00b15748254c2e3b5aabcdd34a5b40735d5dc63db4e685586eca6dc9e +size 640346 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 2c4c3233ec..325e4cc029 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:496a311c5b972eac15bb329bdfaf1850ebbbcdf6039a155a5efcd854207cccf6 -size 675034 +oid sha256:1256ce7c42a6f919284470b81d88ce46c18095220b102839bf6a62e8b4fb43d1 +size 675132 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ed2da7a60e..2a770e6be8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f5818bb735d242e2d4bb4b7c4b70751a057da34bebced31c6c8d5a98362462f -size 592589 +oid sha256:045d5ebab609faaf7fb5ad349c48ceba7d0b301b0b9d1df157665d473c174d86 +size 592687 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index de7caa15e8..0d48a02ed2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3274442c95afd373a0bb81c1de87471935778f7b98f323c089fceac33d5da56c -size 755058 +oid sha256:0ea739f25c9f53bc8e17a7e4d96cad25a4ad77668ea175576f676cf534575155 +size 755158 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index bba2f8b619..52ee5d40c1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4b4561c45aef23041f2c10a9cbd3da9754f85d6e2f6d1f03cce70a2e495633b2 -size 716232 +oid sha256:6cf838a4ff31386670b8cb01fb841cfd003ceb313991e49eecb8236918b02395 +size 716330 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7fe5dc8351..cfd9fb8568 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e9e29c1fbc333c4813d79d5c9a88cfb6669a858053fa1005135b08878d41d136 -size 739464 +oid sha256:b83a3a6c7cd312cffd76e166206f1ee3f090f03826d6f3093ed97eb9523e3094 +size 739562 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index d68baa0764..556d3fc372 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a820bac4ecac89e48fdcb84b8ae2e6e204c5c60897035e738caa0f2a6d8940ff -size 705570 +oid sha256:f81c3f918fc00f8c7d5b82e4fa7ff1ebb9596dc290b2291fa63e41bf7c980d87 +size 705668 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..70b3144699 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd21c42e01825f0b46f46276eefa5788c52139ffdfd0e6d8186c963d1ffb6dea +size 778018 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..b8fbe41f08 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46915eaa0610b6d2afdc77f3435f866aca9df39f3092fcffa313db402764c31f +size 692120 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 6374f2a227..fa1a26161b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:46185c7d749ee83e44f5caaa3e7d3fa6523ef76956c400a76fb99285261bbd89 -size 771022 +oid sha256:1b2ab3215224af8fed03b8926320914c94a13b6dccc87efc377acccabcde0ca7 +size 771122 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3c1114ffd8..bfbb408f4d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:82bbfce336306c8337f029a5ede34dec538661f5c3eeecf47a7ccf761541c39f -size 662038 +oid sha256:74fc4ae465d9c33046b5353a99cae6f8df41d1cbb532c90d7057982de22e3830 +size 662136 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 0b12dd40e6..13d5251abe 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a813c7c0a1ba363084fb4bc2f3308ee2995e10d6359056147a1dd75d3c917958 -size 714880 +oid sha256:fbc812822a5dc59bd3f933c8342de49896ee9af0b163db1f102ec220d58745e8 +size 714978 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index d17e1a1a0b..44c32d4fe7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fc2e1a57540a4caf285c21831716d63d471430e1ea34ca2f306f212e303ef615 -size 613539 +oid sha256:a7bc19d357c637578e6bffd0780eb12c7edf75c671cd7f3634f90de41a0044bf +size 613687 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a6d06d36db..adda2efd44 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:73b62dd1d8fc39cb3a4dcd40dd2a544cf96dfadbe74d4709d23543987a481a81 -size 741542 +oid sha256:061d3467142b827301c7080992b1abcd93306d13771b3d1497bdd3975653bf2b +size 741640 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 7102b3617c..c18c8135ae 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:241cd4cc8143679705c18d4cae753c37da3a8451481ff6f95eca74c4c0ef822a -size 701924 +oid sha256:6a314a78a589888b70fa62238a29d7b387789a83709f0f9241a30465a34887f1 +size 702024 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 2e003978d6..a0eeabd637 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:80c13bc97bd2be027e03c728c3a6b510fc64112f26324b2c31978fb6310854a9 -size 725946 +oid sha256:aca1cf1177fd5387bac3c62b123857b6af56a30b84977fad043629b2f314c214 +size 726044 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 40306dbe56..6c41a1e368 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:60c76910eaf4a0dc7465192ac036927ded8c409e2c1704ac643dc46c99deb916 -size 692052 +oid sha256:6eb074a3ba0775792c47ef62383331e77c53f528f587d040898ae9dcc7a45737 +size 692150 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..5ec9a0d59d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:347d133926d659082e1aa39507ae69b0ecf0619f2eba3cda6c8e83d6e3bd6d2d +size 764402 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..63d85aa263 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fe999c39690ef2ecaa6759a22fd3c671e00ae7f3cf8d28967609d1254708de5 +size 678604 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index b70f3a7841..4699b4fed8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1cd24917fb9da4f5aa48d907b7f9d5d82ee6bc7e06ad2fa85a52c1243449c995 -size 757506 +oid sha256:63ee8d80cd378c3484faeb62b70b823ac252838a639ba7985baa4cb2d84fdc22 +size 757604 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 186bf5b936..5993b27383 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:45206acb9d970afe6daa0060aa44fd63c2190a6092ca4f49fa915cc2903a2de0 -size 648520 +oid sha256:312702b134ff856faf2a2c64bb0bfa574b2e65b9063029264ab2cf12aebf333a +size 648618 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 50be83a16b..056f95bf19 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f2a41f3313fdb1e98b1efea2be312856dfaa164979db2eb794fb4add060fe610 -size 701412 +oid sha256:87290aed848cd4ba1dc06c082a8436aaa01b3f84055a6bab6d14721958fd50fb +size 701510 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 033566051c..d08024a62f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:41321f61b12e95c96833151f2204445c6388fbdc03d4d885c81173c8b5050444 -size 600071 +oid sha256:b266c1870c9c6b2c2a89340b30e9cc9dcb4151e5dd8ed364b22497a5b54e7451 +size 600959 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..2ff1babc3d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72e4a41cb5e9b0763c448ca89acdcb6f705a9f8329873d74ec1eafaa473d497b +size 662492 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..274a1b9ba4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adce3311f3234d8359d8684f0b26595ad9f4ed94c8e80b2b1d1d688a72722821 +size 529727 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..0e5533da58 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e99956ed56ab91443c51cd995a56a4feada60594409381172b0f43db3ff6f145 +size 687480 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..2e4d3ef93d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41c6c29c96b04dd7c950c4cdc6f7f12267602e826c316a5f61d4b2b4f79cd36f +size 552099 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..d0ee360342 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07d3344a893e7660a26d7b3c05433f7ac41970f1d27d2a4a5d0e12a853118709 +size 669002 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..5f31747f99 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df88b15569a9f0ea95e325434403358417c9c7cb6528455b8abad1fe5f651751 +size 530169 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..648d32e796 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59e2075112f4e20adec349c3ba77816c9fc2a994044d7d963edb10cb002e6897 +size 694186 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..0052eec81a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fefcbf7202c5f947a1420c9c9727753a141e64500fa5a02245699c949f778ce +size 553183 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..af37d3e718 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03900dc1a5dfb05f0ef84b8b910a69baaa005312cd5396b72b727321eff2c6fb +size 730704 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..3b5cb4fa79 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57829fd8c8d72457fe3fe9809315497c8242ed0229ccb4e76d58543daf3a23c4 +size 595127 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..23a8377546 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e26fd778d8f8d51f11d43501defa963310d3fcfc6d48f2219d55a6352062d074 +size 754752 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..a656ec6e46 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78656e01c92769060d5747569931829a55b406a34b2871f09c4c905ad208fef9 +size 621052 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..808383c657 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6bb09a5aab7b2775b10aafe1d9763aace3a672c070a450ebe15437e6a83e690 +size 749618 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..a5f1254495 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c849c7f4ac798900144f05c97ca903bc82f92767b2730c39b28b3b8aba8199e6 +size 610389 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..4b3b1b32bc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a050c13b03be11ba3bf8fa2f803ef92ed350574da8de522ec20e2820d7e98cf +size 773420 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..010a763190 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26f4eeb06df4f96997b1bf40e24e2572cd3de3f4c9b86957b57d401cbb598c90 +size 632614 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..71e2bf9848 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6e395dbad91645434b3cda9b85e048db09fbbab1c593a0346561f9b0191b74 +size 742464 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..bc9a9be0d6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43d88523342816d85dbc11ee27a250a8e51ff898294288da4e1ca6b8c071f224 +size 603285 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a4e5520569 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e66b43c5febe362ea539bd64e76e3984690622620e140f7ca622954dfb67c066 +size 766266 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..fe46adfef9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f450a023f7d953240c575c5a7416186608e84dd6cc7977187523d98ac7d15563 +size 625462 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index dce1d1e0d0..55b1dada55 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:89f285c470f2993a0a8faa22857598ae29eea8b69ffa07fd2c88b81f0e663cf3 -size 616395 +oid sha256:5e58783cda02179a6e628dc0f571dd0a01baeaa424552e8154de6757cb114017 +size 616493 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 0b769f32db..a75b812fd6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eb6956bb7d0aa03a50066c3c9be7c791d540eab8fc60bcd8f5c416f13cecc28b -size 593897 +oid sha256:5b72fbb01120420d6726db56ae6800d8dafa91b22f902110b78c448ef9298b05 +size 594785 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index e5669fc345..cdbbbfa2bb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:25cfafcccd3e88ac4ee9158d7cded35c606095dc56f97f2a63ed1cb7e2a12d15 -size 545773 +oid sha256:f064bb07c50beb3c5050d339b6e1e336499f00b820b41925122e35d8676db06b +size 546661 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4c95275781..e46fbf1fdf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6b31b9f94337ecb7567e89a55d64635419ec03f04d6c67bd236512dae45c27ac -size 526285 +oid sha256:bcbcb0ff0eddb745ad63284c2ccf4b0356fc381caf9541621577fac5522e3cab +size 526383 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 579460f407..3e07eb5c49 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ce5fd07bdb04e243f591ecc4210765d2ca66dc7f399cd795ca4ec9b3139568c -size 609039 +oid sha256:f57e3d10ed206b5483a0986da3a0cac9eb7d6747e783c08b35bf761ea5613026 +size 609137 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c380438456..f94b99be87 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:af187b53d0e2a1334aef25a47420cdc458cd343a348f0753fe29187e15859272 -size 587773 +oid sha256:0f843cfb87ec09d406e520150f72ebe5fa934a6802537b42eee9dde26968a07f +size 588661 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index c5df3ad3c6..b22708ca3b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1233d8e244e3f1a08caa701bab9c07029ed1100ffc42a50831a7ef638f76ef83 -size 536641 +oid sha256:7f9cef8785101e83a7aba58b156a88c088e1301d637d698e401b67f66e12d82a +size 536739 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 46eb95f9df..7c16c1bfdf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:568ab718482b2a34069ddf105f04ab7023205d1a49b0f3b46b8c1e54ba62fd4d -size 517449 +oid sha256:087c352eaf707697b97d1eea5c803958773403f9de62346721125fa5697fc879 +size 517547 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..8f8424f3e0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2d14e0213d08f4d682241ec296ec4bb95138b003fb385b6cd604ba718524ac1 +size 752328 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..7d08ee26fd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e360c6100ed5ee08712277f6a569394aef236a7b802349e8da18cb998edad523 +size 610387 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 7eb3ce1c0d..580975e5b0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b3a375ac93ace9d320baef376bb55d061c54f523893d0f997364d68c12581d1a -size 621902 +oid sha256:4d3cc024bf8f4c428d35352d7ef4d3a7652e89de4a6b93bc74cfa0428117c3e1 +size 622790 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 47459aa7e2..96719298ee 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:768786d79638533f05c3696d376424956a37ed1d17379ad1bd026ba015f3e44b -size 530477 +oid sha256:b10e3d7bcbd08f46d7e6add319913911ed86b7c026303b1afa8d450845a68815 +size 530577 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 2148ea4e54..baab71ff6f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:932568c46ba6be08fcae8b658c175c0207083b6d57815fae208aa450ea9822df -size 587167 +oid sha256:ceb51b1731f4ac84fc5e03e97062c7bfb7aa7f18d0b973aa021723292ae2ac83 +size 587267 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ed0a8eca1b..764c6e8f3b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:08cef4b1e567fec39edaf3c0edf8bb4608d2d44eede7edad0f6770991b57f844 -size 504823 +oid sha256:28f1f3f58c03ab60d5fcf20af2f0c5e3e7da1b6cc51190a2c14751afe51cbb13 +size 504921 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..ab08d1c34b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6740150d66a755e39d25cefccdf053c6945d6d912879ecda59f60de60b73e5c8 +size 777118 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..503a37b760 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86c0bb98a20909c8ae260fb2aa5fe5e5a7865aa022036438921e7670350b7d29 +size 633402 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 373c14d339..25dd3189d9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:440e1ba66f292f2a99ea3f033e3caf9190dabe8c3ce0472ab3f29bacda439e48 -size 566621 +oid sha256:3d9d468140f846723e53cb19d2bdf2db421ad9d8c1da4ab8dc3acfa4c60631b8 +size 566769 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index b8f29a1f72..3ba60a303a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8086eeed04dfc08371d8db518015d224f37f7ae15d90aba0f1a0cfd429cc411a -size 463753 +oid sha256:1d105de25de05ae98ebf3287cbc9a8277bc8b6aab1b1495874e0dba6eae9633d +size 463853 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index c2e5a136a2..44b6f1e455 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5dafa1db9c8432221d0ec34b72f7f5ce6fffd88a774616233a4fd7361f8ff7cd -size 541459 +oid sha256:9cf5c3e4f5874efdf2e7d8c14fb3426dcb56a63a76c03a0ef69c5c800f32bc18 +size 541559 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 42f507784b..4b375bdf98 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:34310d08f51d1fa564d41ac455cbca41fdb6b6f64c569f6e3fa3a95dd524713c -size 440663 +oid sha256:57431c99a3e0657b62db8c47f2b032cd827405d99005d549e72c1dff77da4be1 +size 441551 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 1c40ee5e75..83abef5da9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fe65cd7f97ed0f8ce75331b6edbe14d411fa47801e4b529f68cc8d91ca86b72e -size 609241 +oid sha256:35b33b53c4a7203b2baffcd3b807447df9112c449db74cf945582c2753644205 +size 609341 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index e9760c1587..bd5b3acda3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6f7cc73667212540af96d7fae1aecf6a5beb5e735d64c457caa0c093bb94754f -size 586743 +oid sha256:fec782e4dfc5ba9d00ec67f9cd5492835709b86da9a5a94501276a1273a79f5c +size 586843 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index af1a359904..a6a3280445 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e9852d010c4b91eb1b1ee7b395fbd1db05d96d9dd2484ce6d1bc21e0b49fe51b -size 542567 +oid sha256:8ed44cf2abee7cf91d0055d139cade1abe2695637d98c897eb781c43a38dfdd5 +size 542665 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index c3cbe9de49..b8a5c1bdaa 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cbe419cdeec2981b69f929d7fe3f7c1cda769fb7ec5d159a3f8a6d4c9e98eba0 -size 522289 +oid sha256:691321e2735db3043899c407c9fd763424eb948aaaacbba8b4b15ce074a12621 +size 523177 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5babc11f8b..6cf7c556f6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c53e640e7b0ddbc15a9933b0f409eb861dbc9cfbdf84bdd1ab05c8fd9511fd68 -size 601885 +oid sha256:8aef3af063d00897875c5d63ae0991aab7bdac7d8c9c205f88609d224193a5c0 +size 602033 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f53d8ff94c..c747de1467 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a8632812d1e2f2fb6e66057ca60a466a12de94db0671391f51e80bdc147c76fe -size 580621 +oid sha256:40abc7458b3797bc007af49f2d62a125e2a977d4242902f9c180924a3ac1077f +size 580719 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index d254f513db..07e342c057 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8ace4ead6b16789b8be14099132885599dd26485b20f2d46e089af37f3d43f01 -size 532695 +oid sha256:7f549b1bd81793634db9fac72ca4f3038eeb3c6ef121d8833cee6b121a855685 +size 532793 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 827b5ee45e..168573c0d4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:939df0becec5dc7f7a4dd5c738faed674c6a4f1548d6b47b4793b13fd9c66d74 -size 514291 +oid sha256:c11e94764d2f4420e67f78511de1b07e738450734c6efe2cd12d6592c4d6271a +size 514389 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..d4acccae6a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b5957e69a20205a5b19082634eea12b89b96eaf11fc7dac77534dd3b143f29b +size 745076 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..d3392b734b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79f6ccbd3eaff66f1c747f6fc5b606573ff6d9e81ed3f39d64dac18684b2296b +size 603235 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 9a3be8b6ae..cfbeb87645 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5e0d841a71533e46c7b00112df7953b588c2019683bdd22ff870d7c885181b28 -size 614747 +oid sha256:0282e25175470f4d0c882786f3003fe278c5600df942519b716dca5051ec7e60 +size 615635 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9f930e8d7e..da57b05e39 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:41164019b3a7b1c6ae65c817bd94c06f9cd01a567d864d8ba5499b17eb3f0c84 -size 523325 +oid sha256:d6ce151140f7d2eb8ecfe3c8bda29b981ef6c4a93198674a0877b6d03e469a5a +size 523423 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index eb90259b5c..7d3dc3b4b5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4c2678a2a8bb73f204c7828165544cc48952e98d60164cc0723e7b36b95674f5 -size 580113 +oid sha256:63447153afe15a85208b18a31ae2a1b1335f1cda3a56d55802709f72454e9764 +size 580211 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ddb24414b5..7d6a437648 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1cfaa5d0bbe42618f4ff18beed86a87f58a13629495a5267c9678e1ff0de4887 -size 497669 +oid sha256:9202cb4ce56f23dc7cfbe3d2cc5329940e0650c6bf1f91b5c5f37612e7d37a1e +size 497767 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..ef201396a8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b3eb5b58fbd55e9f2dd36c58f2bac7409f0a692b9ed244a42e60ad5ae4c88d3 +size 770064 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..20cd182f42 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20b6bbc6249c5f3dd8d8e97ffeee36c9e34bab019630333b8ec072c80c51e2d7 +size 626248 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 9f21c18a08..1b045efaa2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:80bc2d77bec8487a82159cb434390205f23b0a13ebf2f114594bbf768f8e10bd -size 563465 +oid sha256:e01cb3ded31408c98d4c398e0c7b6a4cbc3e4ceb166cf26f11e25cab733a36e2 +size 563563 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index a3651fc5fa..05a8d298d7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4b8f72d842a13151e898df1ea5d5f0f6e9651a99a4ce6f506d9b053ba2980660 -size 459807 +oid sha256:50bb49249d3ff70c06677b463e2f22186fef65d6a7935cbfb64b5e64442a3497 +size 460695 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 9eda89a8ab..a06368c4ee 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cb151f5d0d1b209d61c78e75d0de238c61d449fd3dbd249922f28ed3381d7a64 -size 537513 +oid sha256:eefe8ef53d7fe9240fc150388774b5ff4de2cd577126bfbf3105e0c16b10feaa +size 537611 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index c5b7152541..a46fa115fc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3482f7194d1e9ed69ea278841a836880cbad04e9c1cc4af231e31c21028943b8 -size 437507 +oid sha256:9e997d2fd9eb0ff63b26c640a5fc219f4734570c08753a77535dff32e85196fb +size 437605 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b260ca9e4a..bdbd41990a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2deee5cdf94fa0d5c211e1608c9630f6fb2975b5b9b41126e6adc82f145e3dc9 -size 636706 +oid sha256:75199f499cb8dcf419d31df5d99b851633f2e38d2a30f3867fe904a56804e0ef +size 636804 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index bfa1285327..2671bbe678 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e5af269b9b7ff88dcff1a987f999a1e2b0ff49c16259fd7b0a781437329373a7 -size 614207 +oid sha256:9bff7ffd2aed11d3eaf9ef417aa4e579cec98cf082633303cec04af191882031 +size 614305 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9c6d7da08d..bc30d5b3cf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:08f9dadc2c21e9c0cae29007e534a4d4f79dbf2e683ded01fec4a2f8db3748de -size 567267 +oid sha256:5226bf58b74b1090f55b35416f8a80e46167267d231ed9a3c2a4d4593f8a1f55 +size 567367 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index e574c36a1a..b1a1719528 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aa26dc7a832e86d86249ea1db571f864c768707ee24d209efd083cbd051c74ea -size 546891 +oid sha256:626c237d5b225f379cff20410c1c60ddb1b2c00a032b6614f992a49c40f799d4 +size 546989 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 53be888b52..0fa63b5a77 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b958685e9f94f393278f9d3c5475cce2c75d5666a91d2f865728df6715993462 -size 627770 +oid sha256:067cf152258ac83cdeebf10337bc6be67b2756050b0be22fa3fa042df72c004a +size 627870 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 95407e053d..7c67af11b3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c98ebd56d8bd94ed19954574ba2a50cf81e6d35ff19576aa16e7a4c5fb0e5236 -size 606455 +oid sha256:f0ef4ec9ece09b68f8d1ce0606969dac07a53d97f3359b7ce337ec855e6c1067 +size 606555 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 295cf70cf5..c899b948b0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4841c3095e200de63860c859a6c7e2751d155e553880ff915bc1ecb394bd54b9 -size 557345 +oid sha256:dd6453fe688b69372619a76abe8b919218fdbad5819f70248035df2fa2dd7895 +size 557445 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5348e2986e..9f23ac2256 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:11f3fd9fe2425d5eaff8bbcafe6d7ba4e558a13cbc83289b9ce9b0860e0f854b -size 538153 +oid sha256:bf0ca4ac4d389d5c4e8e2edac7deeff29c86c607e3f4f44fa91e5c5b2b64cbc6 +size 538251 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..dd4986b349 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9f68f8a777910ca422484840b34cd01c66a30f11bf81ce2fedd8e05e24cb063 +size 818618 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..0a03db441d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c0a8719f003ff5728b3fb688908e99c0cd3274ffb25546a82ce6f7fc0c1b81b +size 676726 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 4222c7e937..b7a1b3d4ad 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8f405f4d320ed98e086f6e49b61bb351458b8c53e369eaf77ca37b20771d9e2c -size 639104 +oid sha256:c163a3c7eef001f00457000b4b62a3002119fa11d0fbe43e5b6c7e7a262c2204 +size 639202 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 2211fdfa27..06f750bf08 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:04f06aec86a47f092e6a12c7e7bf99e373b9d9f6ea2c1739efd9d2b4e1088486 -size 542253 +oid sha256:fe448776464928c5cfc18bbb164dbdf36371fab3758ca7871d14675c358e9838 +size 542351 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index fc9c5b8ec6..dd77a661f1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:18194c82f364335b090090ea2530b033e5bd161db64f36b21a8a8d3fb4d59d0e -size 611869 +oid sha256:37a6d1ba768d97783e5844e00ae8383231c9bd896f0f7fa0c5531acaeee45660 +size 611967 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ee88c7908c..170f5feea3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ae05420010c8cf24fc02c318695c1c8b12ace56c7a2b491f9e7fb3b2b2143f1 -size 516549 +oid sha256:298eca645d9eb2364ed0bd495bf344a972edd2e67d5397663abbfbed4d8a9ba1 +size 516647 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..5705132ba8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:690eb425c8cb31b07b9a5b1b896aaabeef32951ba6ba23dabf9f23793cbed949 +size 842962 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..728b95f195 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b09c3ea94b5e09cf0b7681c9441834faa9d92e3f5f9f686ab60a901daad5ad9 +size 701466 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index a6c86946b4..cb530e4655 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a202c72f2f25f55c3a940ff286782f36bf9ef01c5c054ff287e4eaef2e42781e -size 587721 +oid sha256:e5c3d287a67ec98e49b70d9893d049d4e6a28b5408fe389f56a2e907ea88f475 +size 587819 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index af08310172..e8a21068b9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7e505e2fe65ff5a95968034b09a2001818eebbff90771a291fe16cba9cc65479 -size 481943 +oid sha256:499e9e533a4afed59e1d580d0926d8b13374542e93e5038a39f444ce1db4211c +size 482831 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index d9d4a926d0..f0710b636a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ed4288690b0979ce0f1c2084b759f0ab26e6b638a26656648ebd2861d0d2291e -size 560585 +oid sha256:90b49cb24bb94d88d87e274a45684a0567dbcd36d2a383a350158b8a07d01223 +size 560685 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8f4f800a84..3797bb6393 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:55ef6140092e2cb63c2cf4ece0d720acd59e11ffce301641c80932ef96d39dc6 -size 458063 +oid sha256:719f68abedd2b9ea278b57c658ee5b669bf32b39c1189deee67ad7c01a1e1862 +size 458161 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d5bcc28997..22217e8319 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e3c808592178a722ee518d81ebdb65fa88b645d6ce3f5e95a21a79c74f2bfc37 -size 629552 +oid sha256:8a792cd617d4c3b96e5f790eb7eada416dfad6303c40a644734b95792324034c +size 629652 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index e50c48a410..76612cdff7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6419a8a57f885ad62cb99a9f9bc331cf8ebd0ed38483da619115f21328346d00 -size 607053 +oid sha256:447b812c8678c4345130265f6bc55f71f9f7832cbf6d40207fad8c6e6e382c67 +size 607153 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 2f3294a195..bf1f464a38 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dc0f69fd1f13ed03858e89545f8f98e4edf1640f57d8be2d97f4207e23cd0177 -size 564061 +oid sha256:6ce9c9e28a6fab183e0b0c41597f284f127e5fe78b4287f15fbfd04a72c43c6b +size 564159 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index ea0dfe40f4..a8ae2fc812 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3ab9f443fdfb6450d229f10482d37640fe9016dd81280d1e4c624133362782c1 -size 542895 +oid sha256:a8bdc3c187f615aa140838b94b3a916c8652d9c9d271d969333445bb02100dc9 +size 542993 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 79abb9ba02..e608561d17 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d9ccc9e157348555fe6dde3aa08b1d3c477d0ba9c1fcd3c5d4573c2572cc3b9a -size 620618 +oid sha256:fa27764ae3ad9e68d555c3e88707ae21f2f1bc95a450349e4e53082f8ac3e92b +size 620716 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 987f1894f1..fd0d7029f2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0c184c855cc4ba91c101abba45880f6379853efc574a4934da64991bb37b3af0 -size 599351 +oid sha256:84d02bda5bf9e8930e2ec8f1147fc510e26523d43d45bafcffd0d84b8ac54b09 +size 599451 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 83e7642840..0f2891e854 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5b462617e1712f1f94155da8c006931b7bf32b179f4cea69df1a0667832577d0 -size 554189 +oid sha256:944b93b6515cd6e114219bd519d6cf21b52b483ea095530d16a0eea7a7138bd5 +size 554287 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 006805002e..f0b61735cf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5de79c599d06c0a236b3e6f3b319bb0e7d1de02afc906e26da1bef0263646af6 -size 534207 +oid sha256:c07600ae8d9361aec5e5937e36d8645183f50bd1b6ba37d9fc5e62716486fd7f +size 534305 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..d762091228 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b091c12027bc4e8492cd01481328972b6327b7badfe59cf7dac6c2c16ad57448 +size 811464 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..a6620b181e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:706cbc984cc258a7d6739543aa951cb57a020abb2e17b958bda0d020317d7120 +size 669574 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 6c79b5f332..b3b48ff8d3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:00e51de472d292f2cefa490c9502433d89d69fea8e87f1d0dcaa89cffaba9ed2 -size 631950 +oid sha256:60392060922862d5cfb88eddbbd8d66d0ef0d7cec82e24543d9d0d0f4606ee1a +size 632048 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 2b3dbb8449..b25fbccb6e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e2be1f0d4d24b8805017c01da9dc1bee2b698231729d11cd6e1cdd83871447d7 -size 535099 +oid sha256:38bc0628113e86ad2ea2b19ec6e09423fa6944eb4cf0db337b18df4b09039aa7 +size 535987 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index f029e422bf..ae79088c21 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4177438a56ff05ca67c88a58d0d9c31bf76da659af262ea5acc93ff6cad2ba7f -size 604715 +oid sha256:fae472db59dcae7e4bdd1fa104465c73073f6da7bbf234e92a931b0e518ad0ac +size 604813 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 58cd5c9432..b98472d1d9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f6184a180e21026a09b893c067bd29748c05f54c8da00a61db597dc882d7e242 -size 509395 +oid sha256:c9378286ab5f410d74bac4a15b2414a5eb7ec73c92ca73b000d7ece4677d7e3b +size 509493 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..dda25c34a8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:383e2948b34ff1a8d4e58d03f253742d45e99523fd3e1c63c9597f1158981cc3 +size 835810 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..f155c7737c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bacdcf9eccf9e55e527bdf56e2de1a372a538e8a2f162faa7c1450d09c0f0df +size 694314 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 79396e777d..3ad202e7bd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8f4e2980f4ace4c0318e4222b61db03680415204d1a17c8f7568a6a05092212c -size 584563 +oid sha256:a49229d5c1b64c108b3c9b6c9ef435c61bf6455a3e59d4f789cdc28578b33a57 +size 584663 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8123540209..e3f765217c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:377dee2badb1ca80b226938d96589c057294c637be9fbdc58eee5739414a3561 -size 478785 +oid sha256:48ee479218b8c10fcef719c7960c6b8a53681eeb1afab7c07d20efc654c00d7d +size 478883 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 2ca3808dc1..e9c114dc60 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:93dab5bcc439ee79185e5797a4afe633f085386806df2116c5589c55f79529b3 -size 556393 +oid sha256:8fb2f8c9284af3241cd9c4a521e441f07b950ff5f630c36832e4259a7a68155d +size 556491 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index d13d26e0ae..4f431b9517 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4b1a777c07b3f06dea7702dff812408f64f7d925bee8c6df55c71b8a9e2532cf -size 454857 +oid sha256:ae9e5a44223104c91f85f82953d0a0bcf7567b1ed1a0795e692cffd05a102dfc +size 454955 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..c7665c92cd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:628ddd50b0d552dddf732a08da389b611fabf1a14bf8be375e12fc1479b3e0a6 +size 663136 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..a39136d1f1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7522409d974d6d8b7f323042416af4e28a376d96ed8ddae92d5ed6e9c93ff1e +size 580395 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..416396959a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23169eb85f53b6c1355ebf35a503f3a22359f07658c567049eb310e95bb05ffd +size 686494 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..d8d87521bc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a17ba3a5fd1d1bc9cdb27a2c444f0f586205a55615f2bbc270723be0fdaa73eb +size 601485 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..15dbc5d8d5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f778ffeba0d55e8f3694cdcfb6145f7cf5d83046f2c22489b80f560cc68f689c +size 662444 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..572d298a34 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7646caa13b15a8cca4f71fb4a7b7e81af3f0d26f13959bc32050b2221c0053a7 +size 580739 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..5c46d83c8d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce7492a939b357963e102d478f0a316be2ccce3bddaebd097eadb9ff787544fb +size 685900 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..63cbf1762f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:435433d47a6697fd3de45bbd6133c8d17510eb429febfb200063598b829a8a13 +size 601483 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..d5426940ba --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51d22240d24f0dd746c77c767f3a490a5dc92c052405052c9b09613e7d9b36b0 +size 729768 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..69ec3950a4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a41d07f9189406b081cb64d17aea4f974d4ebc57ea3e02826992fb873e0cc4a +size 645844 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..700e300642 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a8134005ffe6b615338f019e88dfc0f6e02568be14b433ba98e3a765b989c1d +size 753718 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..6ea6f2afe1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a8378202a9c1290a7d35208582c7409ea65f8b364c9a7629f0367edcb2a061f +size 669646 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..f8fa863c5c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a5ae08daad9dce0f9ea48558ce6530bce0572cf9d7fa3d15b816d650acf01b1 +size 748830 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..16be737b15 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aff2233b33141af97184208e5dd3221ac7027b3eecc8cac71c4ee840792f792 +size 660220 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..477e34a3a1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36912da94481f9fcb139ac509cd49f248ba0f538f4e0f4c66878b3a4f3d32be6 +size 771744 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..8724f51ee1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6f86c66f86e394a7b1f821167416205cec0256497808a18e1a9cf44ec917738 +size 680816 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..63328fd26a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b619e69eaa0960fb65f2c22da1361fbda8e2c1a6684668e1c822590b8165be1d +size 741676 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..7f374c9571 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04efa05e0eef82995129d5c5c6b2ab9768f2e18951779eae836fb109dd242fdf +size 653066 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..9bfe269cf3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88c2654a63de87df5b6fa29818b0a20f8871e1e9bc754dfe23c82f22d09922f1 +size 764640 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..07435433ea --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e54bc6b3fc8250e13e46fd67edf44aba7b1e126912432fb5faa947510ff6ba51 +size 673662 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 31a8f38c17..abf86b3103 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:31553b6c5fd3d15829e89cbff0f9ea3b75b4bf6d38103d266b03c3965a983c6d -size 747974 +oid sha256:ed372dae3c32e4b8eb07c4532b5e0864ee0c0c2283e8040d3d13aca7a60810b1 +size 748072 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index b41cc25737..1081d36e6d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:345f3e8e2ac526017a777dff96ef3f6eac068004a98298cef2144032bb7fec13 -size 771678 +oid sha256:571b4e3e32ca24b43252346af29389cad423aff87f5dde6366378b2b9d9d9975 +size 771776 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 561f3d98c8..98b5fd379f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2d192c491c2448fab36986f0f8f5f28a98cd7cc49a6c6564a534d1d0450f7434 -size 749054 +oid sha256:d4821364d0afb2096aeb4ee752cc22314a7c55832704f9556b936af36ed3e2a0 +size 749152 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 265c27f335..b6de35a23f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:686a4d68d07816945116ada9e03fa44107c211ecee99ddbacaf21a371d045ea2 -size 772806 +oid sha256:d44e14c72173737a77d89de635af25db596268a81bc70ea1e6559575f7b6284e +size 772906 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 895464a724..1cf182238a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:99586d02e448cb3b7c797f44ff9bdc970a38df5f441f2a4d9b902d31c00cd054 -size 711644 +oid sha256:00aeb8c4e9af8d28caa971d06a884cbd9392998e522b3ce44b79066b9d5e3451 +size 712532 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 75c9e79607..6b3df120b3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1202e071238c953646fa025a326306474f8fe5d2f4284568c1bfdbc55950fb24 -size 628264 +oid sha256:fe44a50c0f83d638c23ff1bce17098b3168aff19046118e1a3778f361dc4426b +size 629152 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 5cd33315fa..ea675e3f55 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c94861f5f5f9615fa3c47083de68e3ce8d799861d0aa461f4055b4447df44851 -size 738704 +oid sha256:2190ec9c1c296f66a5a8d25b19163460d04b351033918302c6440f38d8ce4011 +size 739592 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 1e21b59e55..8a40ad3102 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1329eaf9eeba62a92625a45a8005106d7a1fd0dffbec2c038f178ec464939ed4 -size 652756 +oid sha256:1521dd71f11fe7be170f4cacafba62e31b4b89df475628b62a91f43d7e7cabe6 +size 652856 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 089b1fe809..ffe22fa3b1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1133b7694dad203a084edf3cd0fda3cc6f3d96de3f6c58721f079b88fdd45bfd -size 740820 +oid sha256:8f931f44f5cc674833e6d94490448dfab1c8ddf10914271031a4726e8fabc937 +size 740920 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index af86051764..b4b9091416 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:167d88a6cf4b1b831d87eb2eaaa9193414ef5f8a0ef229ed64115e7b6c81b7ce -size 764524 +oid sha256:97b7f8f0fcdc4a0ef2abee8614fe0a921483b15ec60a801dc62bb9740314a8ec +size 764624 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 501fc4d541..5ae510db5c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c0a1a0f22f5678223bc3d77ed9bb2ebba398be16435a6097233994ff8b8610ba -size 741950 +oid sha256:61e44db0b0b049e05c2e2ea2542613eb2ad970a472e5848dcbc807eb27a26970 +size 742048 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 5feaad0b9f..81683f7056 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4f358bb3e9aa9e1782b3177c6485d8b89863e95d2d0e2af3375a228a920e722b -size 765654 +oid sha256:c938881d09825cac923db163607b31d5a4b778428db8be4fb22ece70dba7aa4b +size 765752 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 745539444e..e554402586 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0aacbf6b0cf77c90d2c035f35c155399c269e62d681b79978445cbee055c283d -size 705280 +oid sha256:dc76c2d530b5c266370606ee45d076a24e7e732338df9eb84ddd305021abb216 +size 705380 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 6ec0350ad2..251e7e708f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:10e440c97ab5b240715a10ee6d91d60f72012eb20c22b41066a67704c55084ee -size 621110 +oid sha256:565fe3f04e2b321fd8450bdf67dc974c7c0b61c73a785e7b60a03063f52ef7a6 +size 621208 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 89fe41a5e1..a27b882efb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a723513d5dadf36e6f1c75ea2ca8e95990918d8c34de82fb7e54e87fdc59509e -size 732388 +oid sha256:8b5f3edb95c3f9a06deba6b83bb245acced3d8b80c98f48b04eb7acf042d1215 +size 732488 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 4cd265f233..e7b4d29692 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:952e10de0e68773da072286f959c11c9bcaa0a51f2d9e8d9d3136e0bbff283ce -size 645604 +oid sha256:7d20fa0c0f48977ac1e4f311c38cf0f1030a9995218d7ca00d59b0a4434b858b +size 645702 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 1fb3f9a6b5..075651b09d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3d866816518374c53d29cb1e2e7aa1a83fca3293e55222f7195114e5ecd550b5 -size 627300 +oid sha256:509f4a0917382e0e081d3d2f45c87d9230b73cf38e8cf349c19112e212dd8637 +size 627400 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index dd18d7018a..5bcff889d3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5948d8dedf060f39d8e6b292c2eb5da198787e28d9dff7b56579897fcfcac0d2 -size 583589 +oid sha256:db7cd5199755030cce7dd5a15e5a93022d81a8c40fa6450d82fa8a945ab87b54 +size 583687 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 533895938c..06938376d6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:db0bb6fad7f933a6558b13ab193ad5006b2fa222c37e022a6574f187258aff29 -size 549031 +oid sha256:084c02665d9caf634daa4d394859c1d1e8a5e1c8427caab87de6d996ce397c93 +size 549131 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 949a673ac1..288b257642 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e565c6b2ae9fa8e80f083102b9c635a844a2fb39d78fe3862826ddf1b4b8c517 -size 510303 +oid sha256:24706c2da8da5a765ef26ef26746a566a568be18d16168070d60205badd8dd61 +size 511191 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index e573cd19bb..9a0941292e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a2c1802c5ded988b434c265b7c6ab17c9ecc77fd63fc2cb745f71a823ee04c4b -size 616243 +oid sha256:0d6c4769202979c67fdb5f517b71bce2911d91f2c8614a70e90cf623141e649a +size 616341 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 842148b296..a8f3acf06e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:20c1958d6ff2005baa828c69d06867f7e3f68fdab740214d1077b44dd1edc87e -size 578057 +oid sha256:b8cb50638f2d97f227e3fd210490d1ba116f0dd8192ec05387a00548fd4aa04c +size 578155 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index df22757a5c..3959c1210b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cfadb37fbc7d2db9fd8ec4b82a9aeb5821cf9d3187425688efeb1dbeb450dd5e -size 534965 +oid sha256:229ab9baaf919868944ca576b6204bf71da572520deea57c0fad6338ee008194 +size 535065 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 35441a52d2..68ed835bed 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7a2a5b585c6144c6a38fe3c0e26b38ffb2d79895be966d08319455c782442f7b -size 502157 +oid sha256:342690a619568a154ebe347ae20a016a140664ece61b22a4f50184f71ae5cf38 +size 502255 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..b62f591cec --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbe158e791b5b9ab3169fdcdf204fd0ddf02077e296937774e5a2f56d065a9e9 +size 746706 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..295bb550e2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36432597c11dd1a72ba3b8b46e8a5d42eaa3bf41fdecb512f449ca4897b7da43 +size 660070 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 03bb4fd0f4..7b776521e7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bb99b1210cacebb103e5d29527348ece5a4cc2eba894317d433c259f6295916c -size 612529 +oid sha256:73f5ada0e276670948a822c005b014d4387cc7c0ebe95fb34396e4afb0911514 +size 612627 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 93b42c00e6..eea8919b15 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7f5660e34ccfaf1fe202ad657cba6f650f912441dcfe1e55b5421d63c4705f1b -size 521155 +oid sha256:f270fd8a878a51de30dd8eee691b7521e55ae23af6f2518ca710ebac57099d4a +size 521255 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 77d54440e8..f952fedf2b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bbd8f1137a9619976c2b59435d7f01bbbafa1cb691991eb552a758a41fa93248 -size 577353 +oid sha256:c64f876c0a2088443ed0a342bdedfb790b72d6a03e55a0e58504f3eb8e9b4790 +size 577451 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 095d11ac42..4fa273ad8e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a6fa0676f12d5962d61fc7a9f8292c8f047584e43afae197efe35e6b16c01a8d -size 494415 +oid sha256:9b5fd95c563e9204de7815402117e80cbb56102a5820877512975319a3701461 +size 494513 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..090642edf5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04769934c86b089425b0c8b9d9bd5685276291690b110f42811025745fa341a6 +size 768042 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..72f8766b44 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95f2f900eb45b107879dda464b6555e4a3b64146145b2573423bf0b149eb50d0 +size 681504 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index bf3596f16c..889ff1b8dc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bd26bc0104cbc58a2af34de632f45a2a650cdf370d08eda130ed85261c6f4161 -size 550195 +oid sha256:e1bf13da3c125c885a15192328c910a8ebe665be81a1ba385f58d0a7ee76685b +size 550295 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 09c76f7ef8..660ece1894 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4336c617272aeb7dbec4d48ff7a022ccf94a3afc2e64ce7fd6d013904b17b5b2 -size 447969 +oid sha256:32b42ad0091264086c5ffc7ecf4b25da67953315287f79e1cd05146e94e104c9 +size 448067 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 56de5bda11..d192b57a91 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2d39b9053c94ea6c3c6a693fa5c51d526178dbc09b7f45881504fb97156d5c91 -size 525873 +oid sha256:f0bf3440e87072ab79d6f4cbc6d91f55d9d10b9d3e6f7cdda164b6142a721c1e +size 525971 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 1a260240b2..036bfb55b1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:efbf3bb5bb645ce13240ee287c1cf71901c5408ce09e2ce69848e92bf3afd504 -size 425965 +oid sha256:c5e3eda855b764299abb8951ff17cbac95214dbe5efd9c0491bc8f1a7022fff8 +size 426063 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 0ad1d56330..7a4fb3a469 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4d1a34be9c89c0cbafb9cde5311f8c83660f0677c8fecb45962dd41683e3c307 -size 620936 +oid sha256:400fe42b5442ca002a31c91659fa836b1055d7893d4e9d43d231c743a8aed692 +size 621036 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6b9e475fa4..f172f038a2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6309388bcae327da8183ebf3006985902b1e46f0a5ebe6dfc9e5aeca1a043a3b -size 576435 +oid sha256:0674a78245e738cafdf30d3b9a7e152f7a6c72788efd519c6aa4b20d833979e4 +size 576533 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index a3c1ced987..3a4fa18624 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cdf611435e69ed93bc4f691b1615db6f4797e5e426d496e9cea6a94ba30e4c3a -size 545085 +oid sha256:0539d1870488382e85f80a72f7be65302493c4dc93bbad2cb7c5a7736695a6d9 +size 545183 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 825cbc0c08..0ff51123e7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6230acdaa050eba46553dff798946dbd99462e0d5db406da3ac00a777f8d71ef -size 507145 +oid sha256:ce87771aabb8a2720b1c56caa2f76c8b5a68566e196ec9dbe441cae3c2c4c72e +size 507245 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b9fec0b977..6916bb8861 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:22944ec1c1d7b9f19f856fee0ac2651418ff467f7c579b351b0916e368ede375 -size 609089 +oid sha256:f3fdbb2df798d81f5bb581a48beef926de95c6df13cca682b030c647ba37818e +size 609189 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 7ce53be8c2..f2a99575e3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a2240d6813d34baf94d196df5ca5892ff387eea191e556ee4c1c29b1fb44f37b -size 570903 +oid sha256:9d5bb2c516a5c39e678712965df206f924b9e04bd520a23e1531aac6550cc2f7 +size 571003 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 58c6de12d1..1a152f5fce 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:da3aa175a7c185936b895cd5669b0275da64f59aeb95dd62b1b21b01b72d85fa -size 530969 +oid sha256:cd65aaa999983ab9e883e1e13e67cfc290a903557895cd35cabb5b58ef1cd6fb +size 531069 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2997a81921..31a9e391cd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8a9179eaf451933b97681e431cb62cbd15d854f950001cc9bc50f7f14dcb8e09 -size 498951 +oid sha256:3104f5e41203ca2f019519afe569a817570963e1786b75f5aca6f72613fa0e6b +size 499049 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..715e87e411 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:140e5ac9861baae6bd12371c9b9f89f051eecad7d6dfaab091e5c2b4953db1c4 +size 739454 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..b1574a15b3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea32af13d42023d56195c79d0d007e0eae46b4de76580477edf47276ce41c6be +size 652916 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index a6168fc030..e8ec23abbe 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b32724144f14753c201452fa8121f7a11710566e7eb06182152cff4ec5ab4864 -size 605375 +oid sha256:b0aab481c7ec40266cc03966193e799f9157b922d4f0024ae4068c700175f306 +size 605475 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 4aba5f2a71..9d0d62de13 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1fc94be9dd4a96d0b5e35b708410befa325a81626de2fd3137eb0baa75301f3c -size 514003 +oid sha256:faedc3b1ec566495b927eb8daacbdd400da5315070a6c1588e76ca32a4d3608e +size 514101 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index e4883520ba..b89834a798 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d989e05b9a7c040bf824a627c361ac83f2841feeca7e8245f61296dba097c9a2 -size 570101 +oid sha256:e857757edee657d9d3acc7ec9168053efdd2e2fc1815cf324d30929d8b5fb35b +size 570199 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 88d90605fb..0994bc8725 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7da2b6156b1ce44aec6a6fbc65a30e1016d7dba5cd144da2e3d77ef2d0201267 -size 487261 +oid sha256:ce08680b2b688a4f482228051988e53db1a47f6cb63de82c9085564785ceb236 +size 487361 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..c61d2c26d9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de94591bcce5e192ab0c7406a3a4db8b1ed50c2d512a79a9affe161ccdb3458f +size 761038 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..c9590bc75e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:860482610d3dd83acfcba719d945c4c3ef45f8a5456f264d5f524d7d922028d3 +size 674350 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 7a511f68fb..b79055d145 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cd1faf182b0c42852ce2b3820c333583acab585b20e69144ce085d599f335a01 -size 547039 +oid sha256:9e44d9b4a9e389335ee13d2a02dd562971dd914f46b1fcdabd0901afd4258d61 +size 547137 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9b6e620d51..d71392680e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:20c4c1c84de9c44548325c90768f3ae23332892b6ede9bc66dfd5ec1fc7c5053 -size 444763 +oid sha256:eb5d8e791c6fb0845b3fc274d47f60b5d4854605d93ee1bea4442e68ae410ad9 +size 444861 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 37abfbb769..e3eabe7715 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b4a9a968053268d2c8249cb2acfdd86b276eb7bb137d4cf1a29c9b0134ea2af -size 522715 +oid sha256:29856959f4a017b455366b17bdcbbabd8d5fc7c83438a6f261b3a7e2d1719366 +size 522813 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index f325d52b8f..6962d75f70 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d6ecc28d551778c74337cdcf6c477f68bb4fc8ab069ea7a9190e429814bd7c2f -size 422017 +oid sha256:355c87c007cf361743d64baa78eb33889f74b4cd595e9202d1746e7776e04e65 +size 422117 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ff98aa00c7..8b32b13a6f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7f701da7366bfc97940e41649acc02cd46464947890f5b6bf28479a7698e485c -size 646032 +oid sha256:71b9cafe9687008f093747694cc37c63601729a841ea24e8500ca8a4c20f80ae +size 646130 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 31ee3a5c07..6532867e44 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2330d21461e5283e92db22edf5e767e2d2ae3181594d2650b4fc5f1561efd023 -size 590381 +oid sha256:7ea43a3881dd5ca006ff42080cfd0df3075f801183b991259445528aa2fd822d +size 590479 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9355b2a735..25bd6b1f1c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1de1934dff33286737c7d88a18d4c1243b45c73038dad3865a50eae9e686b0c7 -size 569737 +oid sha256:eff21c8c613e12cccfcdf671417ac76a235b7e6e099223636533a4839add0e17 +size 569835 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 763148d638..91251f5a3f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:99016f0b1e04806aeb09eb51618e07977e469225365c8b14e1fd08f55a96c874 -size 531007 +oid sha256:cbd252695e42ebe5ce4c45485ac6281bd1b419106958841e8f373f41332ad3da +size 531895 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 00d824e44c..7df13025f3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:533cf6bae81cc42027f8dee17ce7618019a60f0c4e79e9f2483116d37d5fb78a -size 634926 +oid sha256:76aaae8f6a0cb27968c737385fc1db691945300276c19d2ca6026880449a01e6 +size 635024 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index bde1add93b..2172ba05d4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:05f4839af60bacfe6fcdbf31e7b280e3885093ccbede81327e9830d45ee00673 -size 596147 +oid sha256:1c5a8d7883cb57573fe3defd063e19862c5e5a926bfac522d10cbff3bac58717 +size 596245 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index d09381630c..59d8a5a55b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3aed8f83ec8e369afdf7a6ad997c6e930d15d65d729b56978d0238140f46f84c -size 556459 +oid sha256:4611a159eea06cff307bf60bc9888d1d8854bd62e752df3ad75e1e490302ece0 +size 556559 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 0790123983..10ff92922f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eb4e371e4120d1f9d1483704d45ddd402e817aff014ea0d0e2de131247b994de -size 522861 +oid sha256:182c68762f2d2ed162ebcc3de428e6d8ab6a0c0038516a101ef3c48d7c7b56ef +size 522961 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..825dda7c78 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbcf3954d00842591b3578a042b0136e488b7e7c38843424c72d92d8a26a5a70 +size 817040 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..163d123c70 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:414a5be577b93b0cf9124cee16e9cbe64005d51b6481e696fb70f089a5b80c0d +size 729072 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 6a59617371..46262460a6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:23a6c4cc0b22436e35f9ca9c57b7f5809b9a14d00f647a66c8d1bf7d98c513fb -size 630472 +oid sha256:badc33ca13a4ac9937cd534c99756eebc4772e1fac5eb6a54183c2add288e823 +size 630570 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 0bb6468ee9..9acc3fed85 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:988420d8a82acb3890761974f2c3df5a87b141887068cf6f53e8f62b792e749f -size 532685 +oid sha256:e896fc9c086cb8244ef1333001b384d209dbf0172bd21b45b8788944d4ba5c33 +size 532833 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 8c3181a8fa..12bba45c4a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1f9697db140c21a31e9e87303902e50ab8b5c17fed74f794fecbe7746d4f4481 -size 600425 +oid sha256:dc46d32a1e2d9774d63f0dd09ffc47287969e6ffbbe84198a4b92c8a5c14bbb8 +size 601313 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f41ec99604..d0c1d7f411 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c8427dcbb44f33c648d5cffa0b2b6e96eb41d6951d8a1f7e967d955f45282c01 -size 506831 +oid sha256:71a008104ffe1643df78dc82e3585665d26a25c7a054b111cf27dca8f38c103c +size 506931 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..6478c6d0af --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:149953580087fc00281fa7f545dc715c31553f0635de7abf149fbc63f48d17c1 +size 841336 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..84239cf83a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:add2ea98e2979e387a5e2011d6e6ea30307759f476d72b440b1bfe4ddee8921a +size 750408 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 0f586404aa..bc59e19b62 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b0d5c2310343de3de493c8d3508f9909b9c217f3384d19c4f6e9d5252f69d36d -size 571295 +oid sha256:13382e59bd15ea38e7300dd9c114404935e141fd008de8f5ee4afde238b4d71e +size 571393 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index e2ae45ca9d..d727799a04 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:084219f52ba669e9d5562d4ef4c8b029f900f29aeefcf8133c23061a831d4240 -size 466947 +oid sha256:5b52b039f2fbe40b39640e7fec4129ca28521e87defb568a506427811c9eed62 +size 467045 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index a21f171b94..c97cc0b5ce 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:68d4e2940c837136f2e4c75c79ec627777dbb3c45b4f1be8e75765a696d5a5bd -size 544899 +oid sha256:1ed16df5dbd2bf900f841ffc56c7749a8d9cbbce37a1582edfcf67f13772f9b2 +size 544999 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 73e81833ba..3a6ad07fbb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:373e741eb1554bcb663356925aa8a486dc5f75340508154450cf0706d279ee06 -size 443363 +oid sha256:fe22331c778159ad28ecd641e608b62241733593cdf3824324196eca53d97549 +size 443463 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c84962a028..e0f936244a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e1756cd8863af475d4af4efd6895a947216968155f76117ece67e90857cc825a -size 638878 +oid sha256:3d97e3c593c65bdc70eeaeaaf4c3c6ed0a4f392bf5c691953465e84ba71a50e2 +size 638978 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4b146c99d5..29613723ca 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e031b4701f628bdb25a1ffcaf0594fbc35661f7f0909489bca60ab988a5a94e9 -size 583227 +oid sha256:d753f115a08b3f8d5f73ed28ef2ac4e143d88b0dc4024383688e24e97507de7e +size 583327 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7e52d18fcc..b469ff753b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c2a03ceaba17e9a3d33b28b966de661dd5b747fc16d896dd268c8e98dd334b78 -size 565789 +oid sha256:11acda7a472b090a8f4d9548c80fe153544b2a809e4628ece31580bca5fc6720 +size 565889 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2aa95bf35e..a3a8a51058 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7375f38fa8fcd498c7645fedfb0f9b8174c6dfda8d61b2fd0130a44141a7a41d -size 527851 +oid sha256:05c37600abffd186cf7b1927b46183e389da402218c4e946896f386f5f981461 +size 527949 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6f9b310693..23060a0269 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2f3a846a158ae3aea2e0c7de3888e2c857b15564ead687340975ddd926b2d64b -size 627772 +oid sha256:b07e78f8822744c6662f9fcb36d42c8276dfd095df0d83ecceb8cdd5ea32b32a +size 627872 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 67ab1db057..4ffc377109 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:407e28eafdb23bb6484a4d4039184a16e9269dd44b4a8a28027ded50e75d5f21 -size 588993 +oid sha256:e2e5d3f5132fbbe6bf5d645a35b2314b31270dcd5b7d48cca23f411d04e495a0 +size 589093 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 66a583e920..8b4d0418e0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6d172005d547d4cbd00fe0383ab2d121eed685a6a61cc29e64eb2bea89b94e73 -size 553303 +oid sha256:8dd47511789ff49cf05c07967b6a6c7caceb31930a673b23d428bfb23572914f +size 553401 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 88961afdc5..b8d5477996 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:656527e8a73dd2d0bd2e9c572ef2d9809d2bc17a6329959081d18eef7649a232 -size 518865 +oid sha256:a4b155f2ec987c449e64117685597693fa21e89b7570decda6c9cfc2f53388a5 +size 518965 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..b12facddbc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:addf7ad385fa7bf58963290f6d1ad258c83abfc3d1f5a8d588ee280dbfff6603 +size 810676 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..c4537840cc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42fab8c8ef5d19d762c02ebfcd5d1ca17afaeedab16125ec03136d2fbd0e4540 +size 721918 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 69d2fe27d9..385152f1f7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:741ac29cf9a813b6d68d9bc5476d2577d477c5af30eeb03392b0adbef108b3f7 -size 623318 +oid sha256:3a3659ea34b1b09a2157048684a42f3390a1febc9922f0a6a1ce91cc5912fe9d +size 623418 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 0df554c585..88f6eab164 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4d4237e742bdef935ecda9d354fa1f2808301ebf4106e4abef72c3d171b01f42 -size 525581 +oid sha256:13115c47fe9cd06f86348e99a54ed0b6c2d1c892923ae22b7d0936f9b163ffea +size 526469 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index e592d51ec8..6f2f49a028 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0b25753c1153b09df8041ce8e5737bb4dc9537102a03ee5c840ebc063a00ee8d -size 593271 +oid sha256:dc1b4ee9ff4ca06f48743e16ba80c44a53bf9393648ef391af0954f2680859c9 +size 593371 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index d9c2807696..e3c9204d29 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:89706fd419a1d8a5a309ba7beca67a4bd86435f2167e6e39e35176f6a1fed6ae -size 499727 +oid sha256:7635b450e9a81c55ca290ff989ae7031c827d7262ed60c8d8319fc7b2854a8bc +size 499827 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..9ffc4c083d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c58544d9430922d48d7f437ef1e738f9c6ca5847c9ef5de7ebe94d1b0252c5b +size 834184 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..a8a2f4a2e8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4722cba06b4158121a77acd8bc6f78a534d0a7ece91e7f8febf657a3790f852 +size 743254 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 045d473bff..e0b2696f0d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7aa829932ffc72823334e915b53edcde37af03d0b5a098153344101654be6d4f -size 568137 +oid sha256:87320220a22e35032e726c8a7b2526ea3a3aabc24c2501f6afcdb1cd135d2e62 +size 568237 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5eef900282..4e23fdc096 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eb45b49616dea943b115f6bae114603c26d991a6c9b6bd703290aed4b860daa7 -size 462951 +oid sha256:236d62ab5aba1b62ba9a9045148c09bc4b656d8136dcc032f3604d22a39f3d41 +size 463049 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index de33544d58..26b7ef7bce 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ed97d42a0fc73b8d3219ed20adcae5b7518f130ee5f1f811e143082ac6815978 -size 540953 +oid sha256:35a031624549977f39a7699ab927e2bdb351dd0b32e886d9942e635c0144d2e5 +size 541051 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index d58436b68f..40823f05ad 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a50eeed5961e3b2f4715741a9ebac6dedeefc6c147ff5974a2f4dc8d106b28bd -size 439417 +oid sha256:6e890cc7dda9cfadcfc347e84e1056c988bcda524b54fe238b6778ed4f7eec59 +size 439515 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..6627277c11 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6cb7da6ef3087279700051e08d2f8cf7778f06dc6856f29f3f87e5f4b2fc6e6 +size 636990 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..cfa426a764 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ced12ef0f839f9d29503323f892c57160057aac6e3ec4771ea11955f3fe1c87 +size 547933 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..38ad216b78 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5254648944a1347b6b264f1114db89f2b9f7f95074094f2f5a13db6c56d01585 +size 638320 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..636d05915b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70c0d4f5d2dee3e8b3f503300d614a966fe660f62463da00caa52c93e668989b +size 549659 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..2c00af5b8d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c7c4688d9006f9b46fcda5515f3b3b8f1a9e068201d0b9ebced425ed3e27662 +size 704904 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..0c3336097b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f19600111bc65730c5d8da8d195aa7e769cade2f4f792e9e1aba05af3df9368f +size 613629 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..14e57be827 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dd707c30298b451eeb5d9096d13e17bd58069b90c1ccc0d0e0c339201d7896d +size 725248 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..5ebff8c502 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff0d0de56f3cd725522940cc97fd26edb21bc8b27366caf07bb64a3e4cd7c800 +size 636638 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..b4f6f17042 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7faf6790a8e64af2847ed9913a7e63fad8ae70e4cf01175cca555c20cbe009c6 +size 711732 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..156757d429 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ceb6586af87964f68c4ca512c5973d09c99f15eb2b74d6c5af22dd07c2d815c +size 623120 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 55be8847ac..8cf14e15f3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:15b3db9fed61db0a41fe2c2a9914fc159d008cafe49dd10ec87fea96c94048d2 -size 725868 +oid sha256:878141f6f46b4476d39218016361c53b9cef96ae73ece6dc654ceb16ef5f3ff9 +size 725968 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8a2cd884c1..250d496dc0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b8a1a6375abcd046ac496668d4d1b5bf7b97ff2d522ed56c4d0331c404bf0e17 -size 686350 +oid sha256:c1fd77977868a9c3f056179a2d20165947acbcf195d2d3b68cba89b9141fd057 +size 686450 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b0c4c314ef..924ba0aa67 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:75243bf3b192c42c361e1bba1f60a40f2e0eed486e8b4fdbeab497d9ba30493a -size 708300 +oid sha256:74a6261ea7b0fc44397dde95654e306c76393d044a7f9479e120b61627d2bc49 +size 708398 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 253ac2b3ea..96903fffb2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d53e75113bee3452610374782f6029e5804a41c62c36ffd81d9637b4dc7f63a5 -size 676872 +oid sha256:d5a4e154d30ea985a4f91adca1f39a5a8d42705bebf60918f2dbd10f87ab4672 +size 676972 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..c562e9e568 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c7dd8b5017e313cc22e364ccfb826c42eb34af89caa07150bb6aec934034de8 +size 726678 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..9bbf583bc6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:125df8012a2b41913140c17a61de2d082eb98928983e015a654ded6b8e068fba +size 640534 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index b8bef43017..2e5d14fd36 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d0a787aad77d8150313fcf81855bd15c8208413984bb455a8b1f92622a96ad2f -size 708780 +oid sha256:a9d2cc358e6fa59c1f9b74cb0cd826beced3441ee44f3b5c0f6668cf6b99120b +size 708878 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 280d991365..7871a4a30a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7dd63ab25b27a4adfc7659286d7ae956f2362b12dccefc67dd7e9a0651970bcf -size 618096 +oid sha256:2a9c9bae081dfd7fa24e56173df07af5ae24a66bf55f9e5a083954910fbcd06b +size 618196 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index bfac080835..50e30bc245 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8fe4ac5bcc544516f4058c5869f638369151b2209da8348f63ee754165af84bf -size 670346 +oid sha256:1f39b253e6f56718a2604bcc7de711aced163caf6f39d0fce74002dee6b29bd5 +size 670446 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index d2fa56c32c..abb12737f7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6ca427252cb0482967c4aee60a47844e9e53456ee70d4123fdb095c11f1158a3 -size 587261 +oid sha256:1d684a76b80ed5201dab1bcb50be75f9b1b88c9ba9b2cec81488cca0b0ae0d55 +size 587359 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6991c04ec0..31ed012065 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e950dab28d66a7cbbb55db2f87aafeb403a1e1419db09fddc27f5fbeb952744b -size 712352 +oid sha256:8852ffc453bb7f4380636a1e658d36e2ecc05ea7d02b69b8b3974497c906ad43 +size 712450 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index bf712f4724..b9e3343a6d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:27ccfd8319184bf4bf6c9752a133b162d72e6fdfedf2b0d054fcd6b78cc9a545 -size 672834 +oid sha256:53d2ccb10afc72ac80afafe95bf209abe17ec1630bbc3315bc8b664f33cce120 +size 672932 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 02f9b6486b..f6d1ca4ce1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:649d46d6924bb925f4d41ca9a9ed14ebf7611c476277ec82ed02958964de8463 -size 694782 +oid sha256:14b29f228a61525e8248828a0bfa3f2ea96c078a8ac0b5eeaa40614b8328df29 +size 694882 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2d9a5d0e98..a8a26173cb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b62fb1fcf3f7e78f517c6aed9d87e4dc5d134ca8ae6c081037b408956191774 -size 662566 +oid sha256:be49eaaa18c257ca9abc13a5eedaf7bfac86e9accfab4bde95ff979defe1c3e7 +size 663454 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..03e9670e4c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76259ca9d604dafc8b9dce2e274759676858b676270e6d3363ccaf47a846d6c1 +size 713160 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..b8eaee4724 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6996dd295ee250d5f58be826a502328312e718903914c6c77acf9183214d2b8 +size 627806 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index fa674c4fea..fa303de6cb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d65bb210506f5139718792423a4f23940eee2022d03a8df10eb384389adb1155 -size 695262 +oid sha256:5f3d61abda81d43413179d107e6182114ae1cd066acb951b9d0f41258eebf98c +size 695360 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f03b1c0a99..97f89c70d9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7f8ad3b1f3628bdf16643d8f6dc9d059ca57cd0c03bfb02eb0a2a85a4cbd0155 -size 604579 +oid sha256:b3a1b35ea02d5de982acbcc3a269e91f3be327853468e5345bac45002b86ff2c +size 604677 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index ada866f604..74a93d2343 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fa28fad18c759aab3ba0224dd4dae9eafda91a547371392576b138d887e41f6c -size 656138 +oid sha256:53a2b2ee5b2c02c3d644a5495c6df805629c4a296fcf4e9a2548fdad314e203f +size 656238 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b82926302d..83760b14d8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:32669b24d5c75d55051f5e7212e14f11551d1b3254ecc42b1339bd0c9a4d6067 -size 573743 +oid sha256:7c51cef57c411b2715a27e6355460f3f30a5126b443dabac2672155fa0ca5ba6 +size 573841 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 18418bb52a..deac385af2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5d9a3ec1e53367ad8dfc7bec1b2dfd787834a10ddb633ef842374b91b35c82e9 -size 745488 +oid sha256:f721227426e14d783c8816bcc57cd8016a2a3ee0ccc299482097c0ff8a57bdff +size 745586 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 921af17dad..10950806f2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a145b6d5e2538a3a015b2fe01c386d9cdd49ce34de5a2659dd23890101b48891 -size 705872 +oid sha256:fe8de924affc8419325af9246cd22af932bb8bf2dded8eb98fa7690e1be2f5a2 +size 705970 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 0551a05196..e5eafcd20d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7d5e062e5009b30eeb1d580b163ade13ad425d9afaa0f20ef8e203cd2bc038f4 -size 727920 +oid sha256:e5eeb270aae1f8ef56e4ce7cbcdef8621d41978e27e9de7b669c224a3be44ad4 +size 728018 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 111321344a..fe2cc1c788 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fa5e23241b8548110e61a39cbcf9b424cedb72ba49200e220fc6cc79110bc1c5 -size 694814 +oid sha256:7ab3845bfbb6be2380aaee2b7a689b08a9bf79bcbdd982a6f0f3660eda7a1637 +size 695702 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..8495814f63 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e91d777ac6df920af68961573577c7254ab39f795dba76b5649586e900ace23 +size 792374 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..3d9602915a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dcf172edf51a51f228734096a4b1e16124db276f38e83fe712a6a88b25f9136 +size 704010 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 717fe908c4..205b5880d1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0ca61ba177ff43b04af72ab21846f995f2c12d37802fb88897b377f0484d137f -size 726770 +oid sha256:32bcb765813e78a96e9a1f4a43bdb1733ac3570dc7675f313c7c2ff852074a9c +size 726870 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 75e5b17191..5ad437edd1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ae897df2236dc0213f85993e209f18f1dd437fc54a4fa862f1bea374e4782253 -size 625530 +oid sha256:29092a643d6ab0cd4d582e5f79a8a3af52d19d6f3e341b3dcf6764e456d372a9 +size 625630 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 84a529d08f..c20897296e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef40ddcf62f8ddf3f48f2adc1dcae7fc4f0c1bb320c9d7c959b57ba3655cc772 -size 694110 +oid sha256:36e9c102eb68575de6ad00614f179b5cb413defd499aa8f896c946c303cb06df +size 694208 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 675778f8fa..f06d747f01 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3ea4653abbce3720af2fbb3c444f91a8651ab59c85a74bc363a3d72260dcdbd2 -size 595533 +oid sha256:5eeb8e646d04b927d6e2d7d7a04c77539ab8d52242558b06fa991e9fedd74526 +size 596421 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c90a6b6d1c..97f7f25d05 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dc3c253352d8d1809ac143a82f0ef42972fd479082c9350c0e413acc07ae4749 -size 732020 +oid sha256:38af745388b66fc21cb74b5e9678378f189d5d75d934414d86ff4438728acab5 +size 732118 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 150b3b541e..cc8cb87d8e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:79943b538c79b06707f341c604648e11cc1f2709db4e3f48ba2ae168fd27caed -size 692404 +oid sha256:ce9ed418a510be54c84ffee064f714fb919feddba8dd0d2598a7b35e14fe6c80 +size 692502 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8169598129..33ba257e81 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:313def07e797b61c3451a7477eab21de38abd769ae79542daf11fe23149c245c -size 714402 +oid sha256:4e7b96e5dc6b7044c5c359003794289cf9b2c00655e7d0954c5ba2912744f551 +size 714500 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4f1a033ef9..b3669858ff 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2d35f0c875cb86c237fc3b87a7ba7aea546055573b9ac0879441aaedb4fd074d -size 681346 +oid sha256:41b8ce7cc2448bb03aa38c78a066151be31adc0545402325a37e7a7ed0d9bdf6 +size 681446 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..54caecb2c8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f0d06dee8f42e8a9ec8730314e49b7be18f35b123b6e373629f09f7042d0ce3 +size 778758 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..8071953237 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d27bfc23617efeb09546e3a4d0ceba7615d274e875aa6f6da9b7a6a12d033e5 +size 690492 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 77b9305914..f20780f40d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b2481cc6ead2a7e2d583177fb3a2744885c45d56413a5f33243a90cf353b6896 -size 713254 +oid sha256:a35ec1b780140343a8c1a8410400c9c51dc3ef0d98bbba3e3d353f09ce53dc4e +size 713352 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 0f86a2b67e..b4d1ca70bd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6c951f48f761045eaa7fd42ce52e969c9bd9f8b64ff4d343c93e291918a8ece8 -size 612061 +oid sha256:3eed7fd7d0a59461fd601fe7375df0c80c4098e65b8c8480ca3c3f723c02dbf2 +size 612161 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 2d1110b8f3..8748c52c89 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b4027c7abb61765558633b271e55bc9d00f7f116b47af5b1ba50b5cdfad68396 -size 680592 +oid sha256:d12c764cc5fe1989cdeafe4f0daaf7e751fb77c7ff5c30c08348c50505ae0b19 +size 680692 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 211b262e63..5ce93e7a1b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a4a9fa131a8501549ef7fbc314cf5156eed3f9487af27e0ad3bac105c818b951 -size 582805 +oid sha256:4de9687d31dcd4857780b2f69e84467450b8d64cc6f8eeae9dca783c7369281a +size 582903 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..334ddaed7c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19ae2065bf380434736156489fa1abf240d3f2f34205170fd5f779d14023e491 +size 602403 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..9754e577b9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:920ebd73eace811a7b0e48d1019b37fd70a4425842e7b945e375377e935b074b +size 520453 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..932a36dfa1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70e399674768c55fda8067c5a6a61da702df47dcf16a2a3d7abfed28cdf83ff5 +size 627440 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..6119126f29 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a734a173ab988bcc9c314b8138a429241cc39b7d4855ba910ee89c4a2d55ec93 +size 542825 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..1699773239 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:927fa0e9c7fde44deb94581ceb32f55c263351af8e1d5f3a00679e22a752f6c5 +size 601711 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..5757d43bc9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f880f94765ca1c72fe69e515eae4bcf379faa6f53a80af47f5595f33a5bd611 +size 520895 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..e148c0c5ec --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21fcdac503736a2a28a6794f1e1214dcba4f28db0784fa684791f0cb62926e71 +size 626748 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..9aa6babe9d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c73b99145c5055776319b58355eccb3dc6c19ff8764fc74dbd68d18dfe89ba7 +size 543909 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..bc3b889dcd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a87f38217c50a42052b765ef60e5925640e18cf06314bfc1052f8cfb9b67f7ff +size 669826 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..dfbb10e5de --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c00c00be6a4fc3c55353e1eee93a584313f71218aa36ae6c7eca7caf514bd06 +size 585851 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..31c861909c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd13b8dc7348e19a21b30899b62bc2b2ab41fa6a57a54924aa78ac5a5e57d70b +size 693924 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..746cfad79a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17010bbbb16685fcd815b11def2cbb9dc517f1d576b407329a52c9897b04ec98 +size 611775 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..4c1a1f71ce --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a61793101f9d7987b126b31f1cbbb9daf27200d3f0c9cf1466bd5551fd18b9be +size 689530 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..4a71b6542f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1d7407ed02bfd7fab8247f9066f0d70b0cfca7266efc263212282159bbca9b5 +size 601115 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..6b72a970ea --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4fe517d144c6aaa4d2719f39b43d5426a5e03e53eafe95d4bdd7e646413421e +size 713382 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..5f790a7844 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdbf6f7b6223d1c479435c87ab79ff59bdd75b85ab7d48ac41bbf9d22490b847 +size 623290 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..2b8c53d64e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:462d2485cad06664a193dcd322198f1a4d260cdb3dc265e542870743c73336f6 +size 682376 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..bb593b9b1d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a110f897f6254bf4829b7f43c2ddf8f05e8afa82a5be3a37309e520f6e555dd +size 593961 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..6467d2e23c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88b8257b5f73a7c5e8a6d6163d61be099d204a10b48038a9f0b6dc8e309dc930 +size 706228 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..0ba92f8e51 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea305a3a16894117cb60425ee37664f4a1da9841e3a9a14733c867f27230e237 +size 616137 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index ae7292b83f..0adc75575d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:be9403ac30c3f7ce12095d173fe3c132802c7893b3b42129402e2795b813cb28 -size 688476 +oid sha256:288853525e4abdfd2d55054bccba784f6afd4b6c985a75b2c3a444741658d3e1 +size 688574 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 799b0f9e6e..e3aacd8a58 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6d493c9462f92f43488fdedd8ef63f29e799734c675c8ed0f5e3462d1957d125 -size 714548 +oid sha256:1ba90f682e0c6a79274abad10b3bb8c9d97f97f2bcb195425d1e3a37071f8312 +size 715436 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index a8613040d2..ddd5863c60 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd313deb95e3bc392b0ace7b0abac030a23e55a2694f1e1a65158895f320a007 -size 671400 +oid sha256:078ae35d844b12cceb36d47d81da37a3c1d9f2b85e8974eb6f3324195c4d1b6e +size 671500 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 01b470ad6f..d14ed7cd8b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dbcbd8d6ea76dac159f25d737c5e3d58e83331400d9b2c8cafc2128204f3a287 -size 697522 +oid sha256:2d7a7c7c142c9e52f4dcfae875bbe675c82f72464ad12756b232b5bd91b8a44f +size 697620 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 8e7d0969fa..c7f5ac4ae4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c45f15c5c251463cbe3830512fa54954bb9351f984d3649c543fcc669cc248ab -size 665762 +oid sha256:c9c1a1a6315ea6b0dbaf50966223c6f48253f3d581bfbdd6e0377bad64a0f3cb +size 665862 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 17bcdfb2aa..422718e28c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:27016a551fb9be21b7c8600b2bf23a9920862bcb3f4435e06ecf49871213dc07 -size 574487 +oid sha256:d23a95464f59a0edf35c467dfcc8add9876717b4903d5be57f2631f95facb11c +size 574585 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 59b0778f93..6fb408297b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d4de700dc7ef485882761d97aa7c9f4661f56709d1266c5856d7afa4b612fc5b -size 692822 +oid sha256:eb792a26fb6255ced598ecb75da239639ba76a0265550e22c4baf2dfedd4827a +size 692920 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 3ab6d09bd8..f88aa43a27 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c8161511365d67e302fcf87ec3f56866ee105b683a891c0de1c740baeaba8708 -size 600559 +oid sha256:e2bf40f8bedf50abf48263b9455481c69eceab2efc0a929bb5901134d223adec +size 600657 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index a990478583..cfb25d003f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b0b28bfdc8f2c331f8ce26de00bf1ed048c090bd86bec184a31c2d0967aea4e5 -size 681322 +oid sha256:24d35c4308df15da21dbe6432460447f39855d49188a524dc8dd7fc89b6d8911 +size 681422 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 6107136eaa..863c369fc8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5baae4f185bc3f72f3ea0a4d2aa428056b78cd5bcbd4680531bf4c034b60cea4 -size 708184 +oid sha256:d7eec5a248916c526cbde4f4996a5948770b3be6c7ba35b85f80843dfd27e78e +size 708282 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index c3b8784545..3dc4af2e59 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9af76abbf46fa57a017eeb2520f1df51cf46b57ef242796062202010030bad9e -size 664296 +oid sha256:a35aca0679b9ba7c82df0878169422d5f094f2def24976d90eceabfbf9f5398e +size 664396 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index fe273a1826..c4f8bb0f98 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:32958b7fccc859bdc9715d1354d514b127d4700f7584d2bafa0292e845660ea1 -size 690368 +oid sha256:371d1f1dff011ed35f1cdf6b2da1c6fcdfb7db58cb038c69b50003f9442248b4 +size 690468 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 1e962de197..22382f0bbf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e71a7de4141f0009726475254118e778839c0a725b12a241da4445843bd9b7db -size 658610 +oid sha256:a881490001bffce5ecf41373ac518b3386c61f201b9e18d107fa225a61f341da +size 658708 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index adc2733b94..0c95fe9d7c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ae5c62cd013598830cb1c85a03f2ca7b501598a749c510f0a1e3583eae161425 -size 566545 +oid sha256:a65593a7910c128220c4cea97f366174ceb20104f4af887401a11cc790f679d8 +size 567433 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp index dc4829e133..46a55384db 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:388cb63f4277940e5bdea30bf113d34ea9d0dab88700bc76f0808b41bac7c6d3 -size 685718 +oid sha256:23c54ffdd45da2ad6a7a9441088fe8547fd8b4977e5dcbd92abdcc4edc6ed753 +size 685816 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index ad31400b1b..927364dbe3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d043eed181ca6e27ed442be221d0076aa45ebd72d08575d74d59a62bdea137be -size 593455 +oid sha256:e29622baeafc99353d1e9ea25356c41e80161d9b83e2621b99bc83d50266c4d7 +size 593553 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8be9dbaaf0..fac5305542 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ab03120fa7af112b1547c24db8db8476037e0df5343dcbeb0bd540aa1e1bf520 -size 607071 +oid sha256:4bd0c5b3824a881faaa1ab75245b45742ca1f25caf77cf236c4a831b507fce25 +size 607169 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index d2d15425b3..d96eb8f66b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2856c08b8cad5f9e0aadbbe8bba3f80386b3bae2a88cab63d75ef74f7c7dc280 -size 583783 +oid sha256:b43fb3484c1c253b1b8c178fc9c9efeba789c97d098dd5824ebbf0196f023f16 +size 583883 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 50046bb4b4..77f0abf111 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ffb504ef498d476b88cb74676ec5c57772be6f5d1b937d6ba00427ec3fd8fd31 -size 537485 +oid sha256:751d5650fd90413e0144c5150bf8845e9bdb294e33aea199ddbec786011f9b36 +size 537585 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 3b066064f9..58f603b56f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6e1c114f9e162b0c97f3a9b6808a5deaa01f9d41c43608d6504db0cfd10dcd51 -size 515531 +oid sha256:360c2e5365d3a40fba98e1f4a2e153216e901247492c3cea66aa05ab8cf193c5 +size 516419 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9d221f61fb..7560361fd2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cb19e00c5c01508d301738b4e72231d770fd28652b3ae521319e9695bc03d13f -size 599517 +oid sha256:8f0de606c571b1280b908cf7379b5abc8eb8596f47d92d62abe1aaffa8ce8891 +size 599615 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 399027e934..64b85e6626 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:73d3ce22b3485dc8f7e2e1f9c595e80dd0db42116ecc31ecf0604188ec4ca268 -size 577463 +oid sha256:0e14aa5d73dd290eee14f80d9244e30a0c1ce0a49edfcd760c15a60944cd0fda +size 577561 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index b0fb9c9946..1508b90df6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6affe707bc764558699c7bd9ff34c88a18f00b9754a1105dc2dd2764a54c7d3e -size 527415 +oid sha256:476435268d671f623a912097195cb676090de49cd275dc645ffd0513a946eb5b +size 527515 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 784a6f0c75..d21c048b60 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:51b74574fc7c25e09b63e8129153a209367c80cc0a9788663b7a6671cb2867b5 -size 508421 +oid sha256:42050c2bad90f4abb3ec858fb01a14decda1c12d12ef799ef126f20209e2990c +size 508519 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..fbb221684f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a17b50155e6e0038453e5f3834ab77095538701ba86057eab671cdcd6a4ddb5 +size 683854 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..2f923ae6fb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f55cd18a164cc10057a64bf9250f247b25caf84054f10dd6e8d1fb3aae5c077 +size 601063 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 96c61030d0..1a1324d9b2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9cb84b79b97ca6002853a272c04ec511a4eb7a824b3ccfb52df670fc0b24c540 -size 603401 +oid sha256:68d6eebc58f60bcbf6d44c42d8b0a2dba127507e38b60fafd578c86da4180a1f +size 603499 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3fc721d844..07336bcbc6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e74d1f80c34a1c51db5264d5f2120b572b733fb5708fda468a29f8193208404d -size 520561 +oid sha256:64ce760c7cccb9257c69431f57063c12d98f6fe637616dfd0968b4e7752f5299 +size 520661 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index fa1aae9660..86024c8bf6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:081242999c7cff4518716e394feae461edc4bfc6bd28d1f9d74c502ab3102330 -size 577647 +oid sha256:27c559596589ab111ecfd734b6622461b87a50db5d661d13045635cda4d108e1 +size 577745 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6b4ff6340e..b6e7542f2e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:05ee1e10a4bb49739de7bc497161f2a54a88f9ca6b78f32e4704467f78623eb5 -size 494363 +oid sha256:b3e0fdb1edf22883e579491d1ed766acf5bec3fda6c13b97361d8ea9aef97b2c +size 494463 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..8b10d7a731 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3deec992426bf56a294644d844da801ac28b458539523d9f31170fc86c800ff2 +size 710468 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..bd70fd8e08 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85cad215234b899bd1de4fb0a62e6e954040f28e35194ad79ae82160c5b02373 +size 624128 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index ff3b9db2a7..b659de3c23 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:32ae2b6a748ff2030cfc08f2c79f6d908935708e2c4d079e46ed936f9f7cd737 -size 555965 +oid sha256:adab5e75398cda890a2cc68926df5b6424b2314b6bc35757e60f7857cd143f30 +size 556065 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index a7c1dada77..20fdfd9adc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:29a6c4e0676064152e42dd03bf6f10f69e1aadbda398930bdc2d765e43d43628 -size 453641 +oid sha256:446efbcca98547b04b733c0c18f633cacba6085d85e62656f80afd070e02caf1 +size 453739 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index f3e2b4e13f..095858b09c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e95c1c23a921815bb80af6a7bbbc33ecf4195c29775e188b78ce8205ba57ed10 -size 531839 +oid sha256:2174ab24d488c61ea2b4c94714f70ea2a0cab6f1254f667c4053cbe63930311a +size 531939 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 055d8109dd..7f62bfa281 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:864034602b8bdd6a447c5484443c2892dbb32b8210136a417bd9e0dd7fa91431 -size 431931 +oid sha256:127a11b7101ccaaf361aed1b1b075efa54f81306344d703a883f47f524c70f1c +size 432031 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ae6596443d..6de6c8251f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:36f27f30090ef910c4ce8d09be4788cca36d21a721a59a735da955042b53bf5a -size 599917 +oid sha256:9a0583435722117c928b960bc6b71ed1d04b5932a738600a39f4b74120c5c8df +size 600017 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index e5e660189a..743a43729a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:193e14cd18a3fffee6b9e445dae6c5439e8a61cca3bc62365a6bfe5fd33b2769 -size 575841 +oid sha256:441fa67b57425ec22355c421976b8c406df898a1c113bbf22bfd33e397f79a2e +size 576729 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 0aef41384a..f5a3f391de 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0e5acc0a028cd5bfab3fbfdf64d6ff5a601c146fe01f9977dfa05afdde0e92d8 -size 533539 +oid sha256:ec64f5508f87abade7f35b4d0a453c7bad7036aa9a348b029e6381f4b1d7fc00 +size 533637 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index a81c5d5440..cb3c91b7bf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:52b9e1a1a450af61bfc6750ed162a88d7d349d51e4f6c5b5fd022529a1602e0b -size 512323 +oid sha256:d94c1a8cc954c9c4447b39a72952209e8d09258aa906221d1828cce02187006a +size 512423 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 77302ad480..c60a0f9ae9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f1fe6bd3895fbbdba92163e6b7dc1540fb4255c7a1118c58dc93ca146f985507 -size 592363 +oid sha256:a15ee7f40f41e774816872e95ac326a124a97691181711f185b4cb7dac08b2df +size 592463 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f4073a708b..b637c2476c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:793fed42bd4727bc4ff54d93bfb2d4df1882907abe08111fdfc47dc610e84761 -size 570309 +oid sha256:65dddabc26cf92a3d70fdd35008fb35d0dfcb5c92bbc04172e73beefc3cf246b +size 570409 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 2956fdf18b..bd4d7a6aee 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3400b404745c6461c251fb586deae0a0e72abc05cb60955b41e135a9de2866de -size 523419 +oid sha256:6cd11ab66f7a4a2486b6564174e11dc8d02f4eefbac7827fff6292dde061216a +size 524307 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 93a61ebc1e..4910797325 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3a1cdaec18fdcdb449764090641a1931d366d014d9b05c2e33a28246977859be -size 504425 +oid sha256:02df00c0685b178d6c466677c6842ecf2d3d139d45faac9d1c164417bdb03561 +size 505313 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..f1d35c9f74 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23eae098e4fd4f94e95d7423194e44ac8060c0d0f9b228101af20e92a3a26417 +size 676602 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..d4ba734edf --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57215235235c65a5ad5978e978bbe30c80a819055fec17b3c5ea4342f8f34c17 +size 593959 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 6491e0f39e..41c3a3f421 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3efc8b4b452fa8c2d5cf4a9a6b8798c2f53ee18524765e0623486b08f192eb1a -size 595457 +oid sha256:838f50a009746475af04592fb45440a101206845014387e65e97b67eae838168 +size 596345 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 33b0c00457..08b5860a4f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d795c8bf7149e3dfea7cc0027d6e6954be0b6ce88c7c66a61fac0ab96728e3e3 -size 513409 +oid sha256:329035e27792037ab6c48326b26d8ef7014915da4ecd3690906d4b21bb1c666b +size 513507 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index a99b47f3fb..be51e97651 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ee5b059f05ec9408391fb67bd033a44fdd5e3b23b814d41012f637ce03cd7015 -size 570591 +oid sha256:32b46a2227b37606a36d043380a2d34eab0cc621316c9ee6f82eb07a003ea38d +size 570691 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f4f1dd7b27..6a92169536 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:37b8162f4bdda6853d4bbff50ed8b855afae750da7c74845f2d95c4a8ef28c1d -size 487211 +oid sha256:73b9b3fe4dbc2a9344a371aa12357d969d60513a5da046405f98e0479ae97ef0 +size 487309 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..52b654c0d8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fcc7ea4c9d445110df0ce8313ec4969cc012cc7c03c4cf58603d630c341eb5e +size 702624 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..f9104abfb4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4754819fbc780f120e1be23fe0223a1d32d6447f5308b15233ba14278e4e104c +size 616973 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 9f014810b5..929e9c91d5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b89c151c1a72488c8ca472ed8c3d385c0627d703c57180ff6501bc84fb3286e6 -size 552809 +oid sha256:22869a23fa475a21ef1b9c2e578ce6ee36c6d634e1b4396afe42a6c0995ff1bb +size 552907 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 2a7c5b3a3d..1f1a69940b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:355cda60b03d7d27ef8a143f4d3cf7ab6411bf45a914e69e13af99317faec5d2 -size 449693 +oid sha256:032cbac5ccb91c3ce63b3829fa344687d1e9add5c554d8da6117a1a2e3a30f73 +size 449793 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 717005d940..b5b4646a2c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:409f6a6c0f3614ebdfa3cf4ca3360daa444da1dd9cac245542fbd298ead59459 -size 528633 +oid sha256:24b1d5781717fbf1238b3de1b1ce7067e694f03a565cd9815450bf95a0e3edd7 +size 528731 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index a0013046ac..00d9c75e6d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4e82f2e73b786396457e7b6f12ac3c742946569ff7a1f4372a9d0c218a30e998 -size 427935 +oid sha256:aa0fdccb2f0ff8293277e19972fd6ff39a704689ac6f45080ce0c114d3938671 +size 428083 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 93cc34b119..76e26da11c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0ef005205b1b2bedf9effa81c9517af551224f6d9ac1f7c72f5e183276534ecb -size 626544 +oid sha256:5f7693701676a3b272d61c101f26287f9a01342c0c1b52e8357f02675b2f9dc7 +size 626642 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index fc1bba8c00..602f334583 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7d2237382e1536a11f0012bac70beac1faa5aa03b334e4aa6167c6f9c89c968b -size 591119 +oid sha256:05e4d86d00b9c98c32cb5adab0742a27cafc07bb1b244ef58024aa9b03284886 +size 591217 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 1fa0be213f..51dca70053 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5031ce22b894abb5aecf50558ba83018b1bd940fab004b9f21492ca72d96d212 -size 558191 +oid sha256:3bc8bde88a451704af65c2c90b9f42e264bb01c3ea4cd7ec3b6bc7838fe6699e +size 559079 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5a6c2d83c6..03431a9f85 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09c20c1f3ab20043c26b6bb1689a9c837f233042617f41ad6f7431ddd78e9711 -size 537025 +oid sha256:7386835047ee7f7bb60afbabb24bc6733cc36dacc71a49465dd1dd0dfd5035f8 +size 537123 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 2b658695cf..cc3b1cc4a9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:986229e35b0e02b6fd820af4996fa7e3a92acc49f04fe82f4137e31cbe969e7f -size 618200 +oid sha256:8b573f2bafc60edd66e5403e8f060370392e609c5f1bdcc9bf7dd2ce00b148a8 +size 618298 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 9d63d61548..ebbbfb0041 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:42a1a7b4c0474cef429d076d134030a0d63423cd67d70aab03f6bdce100e26e9 -size 595355 +oid sha256:b5ed98ef49036284fbdb47f399e243e67c7acf211a3112201c0536bad280690a +size 595455 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 398dd7a59a..72ed85689f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3677ba1aa1c7d20dbf26fbe9a3df58899cdaff52e54c96c4b48541fdc5fab37f -size 548121 +oid sha256:6a4582f7c6a5d994b22b3debe0ff2caf3acf970534ae5fb4111c9964cc75faa3 +size 548219 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index aed5ef048a..09e271afed 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:19bfc01c70944fca09a03b06899e040739e605e2c5eb25f2da30c3fa40f7de72 -size 528139 +oid sha256:03eeda56b95d15fe8fe6f1f9cde32e586d3c5d0955b5003ee5efdc5b38f67add +size 529027 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..c4b1b7322a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d932dc97090dd6a52884f0d8ca19c8894bf653264fe5928e951f3a6aba615de9 +size 757740 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..2b45dc326b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a5c45ee2fcb9c633e949ca52481a37453bbd93278ed613afa06bde8f9afce0f +size 667452 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 551b4d9488..103a63a981 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9c14aed38a78d359539608e65897b1ab1c2f3e748eac24c0c15c985cfb9b220d -size 629976 +oid sha256:03b0e2f9d3d5455fd1ff0e3c414a32045af5bce3e8a1776d99dad8f6b6c6ebd5 +size 630076 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3201ebdb9b..610b77245c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c764d8acfba08ce81cb5ff4e289ee3fb9afb0b77cfbbe6a617f22a9e37fe35be -size 532337 +oid sha256:f538dabfe16b3344f3d349bfc3ea425265d307b108281212067dc668ce11b3f7 +size 532435 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index b548b0ea4c..23538e24cc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c0e53191afd8bc469ba01404d725706d68c6e71be7101c47681f317d1e4a6022 -size 598795 +oid sha256:4d58d16c3d491dff73fcaf824cd5ca6025d355273641a35ca55cc029ce7a1c66 +size 599683 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 0916652e01..2122330e33 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4b5e55fa30c8d4d27b0ca76c22eaf8d816a37d1cce5d57e623e5ddb8feed01e5 -size 506929 +oid sha256:606eec22f103536a48fe753d1c4e67553e8b4d0b57306697fd8602816f16ce69 +size 507027 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..78ae28005e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55b83aa54f9a966c4972a3439666ceaac28bd47cbb065bca570d7f1cbf052b5e +size 783714 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..29e994bd45 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40b4967bc2ce95c580bf4429ccb34cfb28eee8a1c39bcdbd0f09b69db0cc0412 +size 692142 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 1e85c30573..f4863dce5c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b120beee9566113c8aa0df8f8aa2124a31592224b89ede315a15e773489614b1 -size 577855 +oid sha256:b039546f8132b93a67764776205f18ea295b364df623f4ecbbed45d4de1a1aeb +size 577953 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index bce0235fbe..b400dac41d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c5033a46a30e36fd20b65a68a49b0e17d0f5b80cea64ae40bf92e21ca636dd1d -size 471829 +oid sha256:8a93ee5acb86f9031acf39c19eeb7a5bd7034798b7de4008cb61ceb08dcffc6b +size 471927 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 9c3b4c93f8..373b6c5b1b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dfa282971c71b30a616d519c77a604be9825bb5f0069e0561a13cca1edee3357 -size 550867 +oid sha256:48ad9b228efa2340fbfe5ca9ab1bea07fa9fe0d5349ceadcc9ac80dda361ab5b +size 550965 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 065bebb256..02b1a1dffb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ca71d39a90d4aa8ee5251cdb8f1f5669cee5537f70cf1704580a84e6a0bf49eb -size 449281 +oid sha256:e8acea3db305a790ab0f0988544fbdf88d7b0411ee9aab24d3fbcc1f73876eb2 +size 449429 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b0adc5fa52..feaf9f4bf0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d9fbf31b7aaaaddc87be472646f9e52d76a63a8d498de8c13127c4001a32a72f -size 619390 +oid sha256:5aed3c10453927c578590786c9a9dd29753b0b1539a18a4df34adb424ad8273e +size 619488 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index a30b593d9d..663fb78862 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d49f35794a8a5d9b5d1660e156b06e613f3e29972dbb2c08753a1bea7bcad41c -size 584755 +oid sha256:ed9588c29eeccda225e0fdd3d129d32bacb84cedaf5ecf64e461d2ac2f71bd2a +size 584853 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index f58023320e..276c886494 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8b9a348d98ee2ad13000b3aa385d01b372b817c5f163518095cefe0d4b22d9ba -size 555033 +oid sha256:8684b8a50055a20f23a325ca702a8749cab70dac2bf4df878e1dd37739a884de +size 555131 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index dc2dd17293..c58f7a5254 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:031d4d13de597c6cb8326850aad7dc9e25cba7969248bf86e5a32d2af87d562d -size 533817 +oid sha256:84da5470341d2b7467f9ab1ba7dcc06f1bb9fd955615800b5c03c85cf3f0ccdf +size 533917 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index db3391456f..4d2f509a9a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:25a9c9bc992344d0ea689e1dec422f992dd752a463ac7f7fe027d86a9d07ca8d -size 611045 +oid sha256:6820f38036f2b8b2b740e4d76120ba2f14101126193172eaf08c89f28975619e +size 611145 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 0928e5be2b..5e460a7921 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b0fe68fe9d562e9295da12cf0ec0fc860287ccac745c46a11f241f8e8acab68f -size 588203 +oid sha256:5d645fa46523abeee9a742e52dd0b498cc5feb24035202b6cfcfea9c2e4a0122 +size 588301 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 96083cd8e9..adcb6fb57f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:05e5d542ad9cfe8d39dd746499abd62960cfcbff65f8cc0b886e29ce64652b46 -size 544913 +oid sha256:ea6830012fc5b0ddcff3016cb01940d516986e54a83ec6ea7e11377848891ffb +size 545013 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index aba59aad80..0629b25cdc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:287b1d3d92c63a097fd42e70a85833a8650c48acd75a0409ca5c14964bc368f9 -size 524931 +oid sha256:df131565d5ac79b7845200f53055a207fb0a406442bf84137edfc932521a1abd +size 525031 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..f60dbb4482 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:042af9b5119f338f895ab2edf7393d57e95cfcc456ae2a3c794070ce9cea3058 +size 750586 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..d841614e21 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3af0a39c4562ab836f9dd7bca40849b3a504b03505dcb3bdb5aa06beaf018830 +size 660298 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 082da3c9c2..7e5c885cbd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b30b88007fcd56bfaac7aadab07738cf578693a25138051a0fd2989f896eea7e -size 622824 +oid sha256:b284e4402d443ace438d0457297c87a4560039f8e6f42bb5e6624b1d861d579a +size 622922 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f8e0588e99..d674317c2b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9f55f0d3f0eda3c6f2a899d1dbf3f2785a4234674cbd5f98fccd9b0bb7b2055d -size 525183 +oid sha256:7e1d6d0b223e199ad3a3625e02c9fa4651c441652e4c03ceaabd8c01f28b959c +size 526071 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 5a0233f76a..6d7e0a552e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0a8df1634594717daeb7438bf91f6f135c63bc9dace32fb78fd8f3396c7aa4b4 -size 591691 +oid sha256:7e90e4a2b54fe9f1d973ea5dbcbecd290a96b659aab0f0171d781be1a23a1afd +size 591789 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2120177edc..f2e29be2c9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1ca24892318356a6714ef4f9cb53c528ef4123da4dc56bb3a36dc82c038c90b -size 499775 +oid sha256:fca18c75440655094188d21280d9e2a2b409c5aa4a1db0a3f1a7cd6160c622ea +size 499873 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..7bcd56149a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19026adc651cfc126bf386eca71515da7c12557af5acb2e535053dc64cb5f474 +size 776560 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..03a795ad99 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aaa5e448ba3a838f12933df708bcca1d018db2b5796c184091e01b686b75cf2 +size 684990 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 6249242bc0..f1e243b8f4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a6613675df34a58335bfdf274ed3fc54a7fb45aca8729d8238bd3461da68301e -size 573907 +oid sha256:f04d15bfeca77841bf0bf6f6d72be0ed9df9890a974d8f46b589d514af381428 +size 574007 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 469a4c2ed4..a2bc536e75 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:27c2a34c112c8c7cee948e15d556d80b1dc88c4e0b481f87f2074881534b922a -size 468671 +oid sha256:8c36ed14dc2e6fd6f726e5f1082647526665b649a2893947de5210a8d870f7a8 +size 468771 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index f2c7e741cf..54cb37b049 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b47286a66900ad11fbcfc97a322a92474a1d4ba028f9f30af9388cc7ef7119ff -size 546871 +oid sha256:1d550aa3c155413a1d8cc710b875d57ac8f5ee040a453e07e0981420f8ee9663 +size 547759 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 9f56265b5f..578f3df2b1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:443bcf278f099fe8f07ab8ec07e687b6940d115c5858c84d09be8bcf9454ae4c -size 445335 +oid sha256:d7798ca2584191b5f9505427b7cb937ffb286f611781e7d7ebe312ad5b7c3fde +size 445433 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..9156bb08bf --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74c1c1939ed7a69de4227d25300c8a4971509a0267be56a0d219f75ca1ed146b +size 644488 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..950e5abe12 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c53be8c1784d3788d3230e64ab717d34097fa46caa6b2d84e1f70af3bc724c08 +size 561155 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..f992a89489 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:028ed126f139c36a7a598424e8401436cb5fb5bd56f4d0455c250a35d3f3b085 +size 667846 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..ac33846ce0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f945225c06f03af161fd5ce55f1889050ee4cd2823a225fe446118c471eaeb7 +size 582245 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..3e35fb913b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e975cda2157b57e983fdd1c83d58c6a2746b555d8c5df639945d506536035216 +size 642216 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..8f1efb17fc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beef00aeef1b06c2390292b3b51033579624ce38026a00b0da339b17817cb64e +size 561499 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..3685371fd3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:298af13a6ac0f5c14479cce1fe557c0e04ca722500492d4725ced07813fb96af +size 665674 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..5343f53768 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0155063a11a1025661f2395cae48bd6bdc8c81b4d698bdc9ad973e5aa7934a3b +size 583031 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..f197cb696b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67ac699a09160ecd8191b049a4928c2bfaf438ebaa2a5535252d6db7a82ba5c7 +size 711120 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..5c78a3c177 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94549963bca439ba86382be555507d620ccbb27f5ad3c38a31ef1c1783e53a45 +size 628134 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..d8997b5733 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11e4947015cdbe789e6be73c7c23a24d07ba9ee214e62a60ce95db1c3563e6d2 +size 734282 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..7963aeaa53 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d47d20c9ed55d9abdf76b207f81f0d1e591e09d1f3fc80fa0631df20c5d5fea +size 651986 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..8ec4d4101d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23142514a8814ba9b325110b19928ac2f7b5408c7686facdd1ce745f2f98213d +size 729392 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..e9eedae191 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bbe779d1552dbc14cdffefe11724e0309abfc4bdd5bfbef654640ef2ee4c474 +size 642706 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..b83b8e1775 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86e10c8135ffc8479245de07ab66827f37bd2b6321c123758a76be60d9a72d41 +size 752356 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..504b143e6b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eab373e8ae753965711a45ba8de4090a6ef346f14cae5dbd45f97a5803d6b3d +size 663302 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..5b39b9ba3a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ada7601cbf11e0b9b7c32e0d5a811a4871961b956af0e5cb24bcf16bba298ca +size 722240 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..53c684c7d8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26337d05de6302d4654127e781e3b2642e95bccaed2bf0ae5dc43d6839aed680 +size 635552 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..f4a6f04a71 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c3ce3b54d498e8ae29ec6e2d964e1c23b1ea7ba19ef3246f1882f79263b54d8 +size 744414 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..847a1ab8a6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11b6d71465d3e0b5cfa94da232953b72c18312034b63b2c782520ab4720bfe87 +size 656148 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 467114e31b..9209791eee 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b9439a3cf3cfea3e38ad461c95614cda553e7f3cddcc8730ed8130e043c32115 -size 747876 +oid sha256:92a8277a25f1ca1e84db3d511f68913b74b05073c9f816a02c41202dcaa3deb1 +size 747974 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 63afefd2d4..8e7ab85264 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:48ae7d330517435c210956f0f0235991c03f5e63bf82391f7d9466328d4b53f4 -size 772368 +oid sha256:2c69e8b678d3cb4c76b35a6d217b3cf237d5ee0316558c2d6d990f91841e001d +size 772468 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index ef23a958d7..3a649abf49 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7fc6568b4383f38c3b83ef3e4771e8393408cf6dcce4144349db8349db9b9c32 -size 749004 +oid sha256:ce098c71bdc54911209cbf62c2e9040cee43f44b9164055808e92cca3c95204c +size 749892 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index faf63de02e..8a31ab90c5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5d48fb0c85491a96cbb1b160a5d9eed0bedf380003ad642ae9a8758e2e016e0d -size 773498 +oid sha256:3bf942128c53a08369ad61d6d46be3fb49a15da10b9909d2b51720e8fa2547a6 +size 773596 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index c65d77310f..8012628689 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ed6ae18bbde039701c67cbef263dd1f7b6f5c7d0c1c471d95492137f5fa12aca -size 719982 +oid sha256:46ce8648a29b69029f889e9e7c0bbef3abf51dbffd7574b78a0b580412b047e0 +size 720080 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 8711bb1885..a591bd044f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a9564e0409371d0732c3169a8f1752f9bfb0ddb71f5ee274ae041f2d42df55a5 -size 637884 +oid sha256:36ec038d3a20e5eb1e94fb7169d699dc62544cd3c38134d431f591d9fd0a25e9 +size 637982 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 832a70250b..7d37acfdfb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6cf2512b25b6cb107cab1a6f30dc559e07b287a997ac18ba4e1f560fe75a5024 -size 747090 +oid sha256:5dbc654027b8901914679329eb45630b9e42d75966f38e966f44e32c1cdb567d +size 747188 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 4ef9c1ee85..67926afc5e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:55ec9dd582717e0fa1b0d5918ad0b766140f755d01f561d63a1021198bcd32d6 -size 662376 +oid sha256:35a182478c06f96e54052889594c15e1e4fcaac05bda7067612f18180c6a2e51 +size 662476 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 7f220d7819..887efca077 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:89f98080856e5725cf43e99038fbe67eba24ca6c813285bf806db570f725fe89 -size 740722 +oid sha256:7bb62a968b12d3e3ec2b1ed87ca99e3807db1bf17df0cd504fc4c3114fac5d89 +size 740820 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 39092a517e..a3fafc15f1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5276885c0521cdafabde44c2ba2c8fffd33c06067c564e195177932835dc1dd2 -size 765216 +oid sha256:d6cc9760cfe726a2893c785a5ab076eefba3c6ab9a16437fe30bbe9bd5c4364e +size 765314 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 7a79eebf2b..77d6fddb42 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8141a5920eddc8ce1b3acc9183c8a44521689e493a33a31d2d52eeead32f83c2 -size 741850 +oid sha256:a3cc57588c032c44453a0d58dd25a868d1b42f803273f92d529921d99706ec7c +size 742738 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 9b564485af..1741535e6f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5b0897c748cd076aaf45c2d124dd586cb1500d9d124867fb5e05126b0993d354 -size 766344 +oid sha256:4b52dc107279f918d66df1a2d1aa566cc798ecac8f16bee88e293b2722f2db83 +size 766442 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index d288c860ae..ce209d16cc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8735867a934e8291832e4fe7dcc6b3004239bd1bc7ff93f3880829eb7cb0933c -size 712828 +oid sha256:68604a673bfd274af641339098853928d520e589191b2282418bce563ef631fb +size 712928 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index c5b4fb3c34..51a4a9ef25 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:abb8c35ad74e22e875ea4903a33a3e020e372faafb1350025fb7c02d0f14725b -size 630730 +oid sha256:57dd19287462d9f2aa18042bd710b55dba9d08c53bd0463edc7bfb231b845550 +size 630828 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp index fac1ac5932..471ba333b3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c923ede568d9d52aa4f7693650d071853e8f2af362839f711e18a10a275cbdc8 -size 739936 +oid sha256:f0357f22d0de4a0457b6a721efd52518624754d9e8ff67d90d2abf6829befb5a +size 740036 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index d79b568abc..7eea8f1ecd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:925a5aecfff7e47985deda1d3734eba8b70e8815da0eb97c2865c9e59c911e0c -size 655224 +oid sha256:44872bb39b992fc27a9a1a44c6e9a1e585e91ebe9b1e5fab26c7d533107bfd27 +size 655322 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a174df616e..b61d593707 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9f8c832d8f9e94d2451dbea189ad768be8d4168f6d0244b954db48934b956e4e -size 627202 +oid sha256:9c0d4056db5397bd32b27a7d648cb2d3a2cedf55e1d16251e0ea3785590ddd8d +size 627300 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 08c25e909a..c963a9cac0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d9f4ee5e501b4952c6076647aa7b8a5abd8e54d93d3b09f259f97789b973d7d6 -size 583489 +oid sha256:6f96851a6b5eaca300798e8f907b41b55c89f2123641bbc85f7549484b020170 +size 583589 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 90bf840211..a8b775af36 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e961d8e42ae7c6084e2343cc33cc5d2081b100aa08ce44f8a1f2c1f20ba675ed -size 548933 +oid sha256:a710a873b24bad585e107ef54dcb310c6fed4e21306421c9b20d5e0a64912fee +size 549031 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index dabd898dae..1f50bafc45 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2469ad9537bfa70e32258d31dfbe58f64b3f0ebda17a65e800b2a3997e7e1ebf -size 510993 +oid sha256:43459b2bb99f48acc3472428f388acbd737d1f2b9681bfac8919881768f9194a +size 511093 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 1e36873f9e..082a5b8b4e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:56646f98faa0e73e804e23f8b6d2246e9edca0f4966e42b58cbf99037bb2640e -size 616145 +oid sha256:865f37fe3b6e1f88847a62f4540c22f02f7b2b01448ad7aa1e2cc35108fe6ae7 +size 616243 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5ae74799c6..a41abb8db1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cc25727925813bf61a5ec17406348908118cfbb5b37df6d75c46ee20a1720220 -size 578747 +oid sha256:fdc6d0583f8f106c0e01339b58b2bb4d903abab9e3bcb49a3cac53b809cbbbc7 +size 578847 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 43dfdd942d..39e28d709e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4550008e1b5eb739bdc9245b01e01564b4341718a2253b46bdb480140b6a2f3c -size 534867 +oid sha256:33feab6414e9aec78f68b5560bd1ada88701f433cbc7073debf92b82aaa2a17e +size 535755 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 27c7a435bb..94ede2fb23 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:40edfa03d9972cb500edb649526c906cfd6876bb5affb6ebb5f7a1b84635e0f5 -size 502847 +oid sha256:e687cca112c8f5646c96111fcd719d4bec533fe5abc54d3dd23bf23562db58f1 +size 502947 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..aae7512d74 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49245373d4a67921f7dbadd8e79862f060a2ca7401784ffe4d5c72736b58cb85 +size 726382 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..45075d575e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69c0c54e7579ce06f69ff68985225463213da3af3d796e2b03d0c7c52277d292 +size 642606 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index c83bbc7c52..57cee3fbdd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4fb4d2b9ba1e426da7e5c3af3a71e19049ba590fd6886634243917ff9af3bb87 -size 620768 +oid sha256:74005afda95e36c0169baac629b9e37b6114425541cf94aa55557d3bf1479f81 +size 621656 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 087f1453cf..89dbc3ff2c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4c89cce83176df8e01ecd825bbabbbe9a7e4ad96e54d8536ac9788790c4731f5 -size 528359 +oid sha256:15c7199edca249e4a4ae31fd0064396ef772079812d60c6a2bf2628415676692 +size 528457 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 24ed662e42..9f48a0f835 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:30fa02caa6eed12344bb6ddea2149d71bc81290d7120df6fd53cef88f124e448 -size 577303 +oid sha256:d50c186b911717d6006781be2a65584dfbfb95300c15d8b03e964c2965f35af0 +size 577401 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 18b6b19654..3460614f9d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:94c8f3867d37317cfb42a6b371161f638955e3ebf602c8c40217deb3f05aa26d -size 495105 +oid sha256:2fad67523cdf8bc55f897792b3b5f66e03b2d53c48e18a5d330c664b956dff1d +size 495993 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..60b5aa6da7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e56f034396c7e06e01dfd45e57c51c82839e51905aab6b1f649d7b441347d7b +size 748654 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..05487f3c3a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1c21be6d3cd0ddd978fb7c1a78f792df5454e9772a514887eb20bfed42d698c +size 663250 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 09f6cc659d..89893fecb8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f414333d00e2cabc66278c0623e1decda21a2c567c7963f9135c2a39dfd5d095 -size 556511 +oid sha256:bb360bdbfb0d3c2567e02826209e1b96587ab000954bbce5fca39e4a06ad0f84 +size 556609 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 491a832c26..de07d0fd1f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c28f6fe0c7a640fe0cce30989a107e953e19c32c1de78c916f0f580f02a938f0 -size 453643 +oid sha256:98274f39bb70ef84c9c40ab4110850e46ebaf18f4d158e5ef6347147de552c04 +size 453741 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 4ced7a6b22..7bd42970eb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a9bbeb935c93be04e29f7b46ad4f4a4159ddefa57498f73c611c01ced1e95d66 -size 527401 +oid sha256:ec402ee0a793cdf2cd85f23cfeeaa5caacd6ac33265fb5753ead8bae30665e4f +size 527501 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 84d5ebc726..ccf0cf3c6c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b7776bf35ffe67e530479170ab38a0b0b2c0516cabc831a507baabf55f00cc8c -size 426605 +oid sha256:a19ba5a2d535fb91737b78ee954a97ecf73bd97ec185c02219b15a6635c737b3 +size 427493 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6ebdccb105..1a52d527dd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6be490edf9d48fa7f62d1d75f35a756b79d1937c481181bc22cca898bdbe30f7 -size 620838 +oid sha256:fe26cd4983e148493f7d60d3a1688b4a557c6a7b943dc4c886d2870681056243 +size 621726 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f2b28d25c8..607a41a901 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:22eb202a57dd84891cbc8ce3c02ef685f4c962afd17ad7ff921873a52f1f19f5 -size 576337 +oid sha256:40738394a58fb87a78c516cd5a901cbd6b046c335b9ed37989e71bd8869b8404 +size 576435 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index db7e25f167..f779ad0ec1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:052887a330ba468979dc44035955f9da6654e34aadd214f6e20892aa7c5ab876 -size 545775 +oid sha256:1bba826783725e2d5215198f90b6696620e908c13be5cb57605b1289e7b8ec42 +size 545875 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 9ea868987f..e3663421c1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7c48369efd81c1f640960b97a23615e014c09263af7fc9b7b60ae5dd1731d66b -size 507837 +oid sha256:d31a2906b3084b45f20e407dad115de9647934961b35bc1acaf0600d14a7e2ac +size 507935 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 92b4b2423f..fd3ac52e04 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0a27f1e1bcb2f0fa1100b1725ca8483f3dd49d2240a1f94e7dff13390123d16f -size 608991 +oid sha256:858fcbf075321a3f7d6e780769b5cb2ee41a3b8bd8e30002c31d02150f9ab222 +size 609089 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5406a5ea0a..32b23d4eb0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d2a9eaffe8f3cfad340e5331b056a4b2678d3944564e3de89ae97d70a067640c -size 571595 +oid sha256:632066f4b01f542dbba06565547c7161b64b617ee6be923ee8b8b184ffea8914 +size 571693 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5d0140f5bf..d0b0233a52 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:80e560960bd3438fa1d42fdfe99eb3d5176fc5fee423cc83e7e72446d33e8f50 -size 531709 +oid sha256:fbcec6375a1c796bfcc26229255ff6bf30b5941a89301c2a14563d641f03af10 +size 531809 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index d204a94bc8..c350bcd3b4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:34fc8439cf417f20d7dce38a940bfba1d009201e0f57707149a3deec3f83d872 -size 498851 +oid sha256:e8f8678062ae69ab9df785e553d3b7c448dddd28183650cfa007166dea9eb7e0 +size 499739 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..da797a74b1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c386fcfe836041b0806bd04aeca494cdb8704b060ce032b72ae1e0f2862cceb +size 719130 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..1f4a8c3b6b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bbd0a6d2abf219beb959153aa3a3271b1be4602a2a959103bf0edfa77f3de3d +size 635452 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 710a4ad434..7c9e885fbc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f22299bf2ee6c88fda6f25982d7ef37f7a1ef44696b5824a1ec6ebd6ea3ddd67 -size 613663 +oid sha256:1b57d72712bc8c94ad7452762bcec86df7105b1cba655869a871e57085cfd014 +size 614551 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7dab9cfa12..e75d07d704 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f38b0fa430bcf63b8fc12ceb6db730e53c3105138ca43b4295d9a8dd6f17955e -size 521205 +oid sha256:6b44904981aebf92629832bed46f2f42ba99743e400e0c5effc0da2b16b45a42 +size 521303 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 7d6d0ccc63..a2da0ed184 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:02808e83be6620c77e41f190bfc5d813bfb953fee615b0156710b30a822bc5a5 -size 569261 +oid sha256:8bac699acdafcbc9c55da8443883d18a98dd4d3fbcc5e0523d93880af1b423ca +size 569361 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 0daead5667..7951117dee 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bf770a33c54a3b26105cb35de1ce9b78ecfbc05c16d724d337e7984611074765 -size 487953 +oid sha256:173b8798325f2597ab432b4a967040cde2bb00f360f17c074fbd7fcf5d83ce32 +size 488051 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..22b7ebcbcc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9a872de01b8e978f9e51edafa08ed2607751a63a5ef78473838209d0ed4d7a9 +size 741600 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..de0f207b99 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d78f732de118d9f4916a324f5820020295b6c89fa295ae17f5a496046c8944d9 +size 656098 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 46fb7fcbdc..0e9d496abc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f888a7b020859fbf958b52f1d20fc1866ba010c4ae64f0544551237c65a5f35d -size 553353 +oid sha256:c5d05b092fd14f971e05afdcee43b52e20cf8d4dc27782d74f1e501c0fd3cfc5 +size 553451 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index d6a305b2cd..6a1969d157 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:393f4b0ebc4c42fcaaefc7b62e9d4eaae48255e08c51519a2598eceb4ec582c6 -size 450485 +oid sha256:dd96ed8e6f74bda848865ba5d6a79ca08856ef9dc0146490653cdf953b5cd9ee +size 450583 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 66f5d94390..fa06179730 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1f5e7c2a14decd84e46d7e3cfb8b97cde69961c1f70d6052995c719caa0ddb64 -size 523405 +oid sha256:2758066d40091b6f8c2b2eea2cab935a2d3de80fa84541218132e354a4f37d02 +size 524293 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index c62bde7cbf..031d6982c0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:addda36033891220ff78cf690f1085fbb34dad71069aaa27997bf2392de51065 -size 423449 +oid sha256:30e1cd424cd487666b13ea229628dd234ed801b93928d819c0bf5e22bd634962 +size 423547 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3057c44a08..7cd21bb8da 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c41bbf6377fc1cab5181665a3a5438fdb5078e8f00861d35f271668add1cb686 -size 646722 +oid sha256:a14a1ab6eb44f87bed713fc72b6f38094c76aa7e5c21ccd9ad0fbb17e9941718 +size 646822 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ed1d39a4d0..7b9f342134 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:adf7b426970568ca2a612ff60ec3aaa25ea5ec6911f2cdf07f82289f340fa99b -size 591071 +oid sha256:c727b7d5c4f2d4db538d7a6471a3b638e651d2204b3928640acd828551c48264 +size 591171 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9009e27a9f..34ba5d4aa4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:87737b5d6f0aeaebfc9a25a9c67ae54550a4079191870fafa8aca5cf97439b3b -size 569637 +oid sha256:ee425d0d0b9bd2aae4a2e25fbb8835781046ccdce67491972c5f062fdf054c84 +size 570525 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 514c7367fe..fb1ce0a43a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3fda7f2ecaef320a788e0388919f3c49aa1af4f7614f2c30dfb0d81bde518829 -size 531699 +oid sha256:608997efb19eb6a45005e54350ac92c6b46f193c53fbb42d6baa7995c580a2b6 +size 531797 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 71214d59fd..8a72bd9d7b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4c905af7fd9df699dd87f3d5153202f2eb0a9339720b09962a42df6b3497e931 -size 635616 +oid sha256:bf501a6b763b597d4fc63c0f590b8a77d65ed1805e5f7a3d6489c8232e9859c2 +size 635716 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 38cc2e49f8..9a2a7322fe 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d6ccc7d8104fe3cac25d7d4ee95a3a1b5939ba28b433d069700652aa47c02cd0 -size 596837 +oid sha256:e697171e9f442b0fa095f3a388292b9b6a0a61f76d60bad3c7e59f03e6ccf1ab +size 596937 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7031985019..f6237ce2c6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fd59679e5950781ce0d952a5843418e48580880933758544ec3ebabda98d0066 -size 557151 +oid sha256:71f078453b3bece51067a22e5d7b5be41f590db7317c46048cf6cd780d4cec97 +size 557249 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8244ecdec3..8b5d6b2dbb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d6ea0c022263400bcd805b8f0a1e0d2d592cafe4e39a5aef76bd6852656d7026 -size 522763 +oid sha256:e8daa86bfe1582f36058a751d1d22b01c1bceaead1eec1fe1d8dde6a8a91f53a +size 523651 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..7f1799e1da --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:471c5b26c7e664a12de3a8110240c379cafb302e20f50bc9b0f4b113545cd014 +size 798392 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..0808d24e1d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2989de2b6848d86a7b86c6aa8465d9a1e40252a65c9fd68f29de384eeba7229d +size 708006 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index d82ff834aa..4f26a29002 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:694483b374a6d32f1abb6ea56fb076f9071f2cc4918ba55318153ad1443c2b4d -size 638760 +oid sha256:90130d61cfde8d5c225bbdefcb991066149d5a4226c548081fe3f9d2ab8cfeb1 +size 638858 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f441780e47..7403a80af3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b7c165c15f0c4ccca68b224a7d1733a6988c88b4a991ddf32c585bdc2a12faa0 -size 539985 +oid sha256:762d9f9e06bde083383457449acd52499f3c3878e5ea824cc84c20da870b465c +size 540085 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index f6cf80cb6b..8635ca8a3c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:29eae6da120c4002c2f40ce23a3e49959b416f209c8b01e794d9595115f977c6 -size 601955 +oid sha256:64e8a49abe8df81344dfef2452e8ec46dd03aea5aad8e096b2d6b35aa2e90e0f +size 602053 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index cb5cccd260..562e56a73c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:62692b45238a39c2598fc7b196543bf7cac35858e1428490d247df7f368ecae1 -size 507571 +oid sha256:4f3813e74f25850ce6830ac336a48edb31dad195efa94189c9e7e34cb4a8f45a +size 508459 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..36e182a95f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11078f35115b0a985dafa0f4dcf97848f33cfb6fa80d5b8dcebe32a4b7d9e54d +size 822688 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..c6a8cdda0c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7b5f6843f400a046b42f0ec24e03a5d2c0fbaa65e355815ae2df86f678fa43a +size 730132 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 311ffcf26d..20dfaa9df0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:68568bdb28bb75a31a755c4d8a69ebd8137fd6f120ba5aefbcdd91370adbbc28 -size 578399 +oid sha256:df1d4cc1bd002eb3536827b42493a7222fd8b71978bf51c634c7e238d139a0ef +size 578497 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 37684bf4c0..df71d2462b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a3d5ef92e7b29d1dbeef8856d2ea7b6e31c7577e18934d94990e178671c6979b -size 472621 +oid sha256:e4aa17f6f4e8b4bc05f833cf476f2c2bd8fc27a3e236e514bbd291bdacb610d2 +size 472719 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index ad63090218..8f0401ff3e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef445cd976838a0aa46b6c59089e9f14a43f4821cfd03797e4229be21397bfae -size 546379 +oid sha256:586cea43ab5c91b33464266171914d888029deb7e9110add3240078a363c3104 +size 546479 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index f079e220c5..b673326c8c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c8e038a57a5ffd7b8e6cf1a3905108aac4a93bc3a8d1a2647a1cd807eaaa9a30 -size 444005 +oid sha256:b8a473af783b55a72e76e10da3e97f6dd90d8f090fbdcd5a58e35d763cf533ca +size 444103 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index de35cc754b..bf11dd882d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cdfc1b29b3674dd6ffa51b95582d80ea8df72a09fbdd7e5b4a9786d531353c85 -size 639570 +oid sha256:5b5cd8654ad0da31cb549cab3eee04e6bd84be3825a8d942c4d21942620c2c3f +size 639668 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index a08c2168ae..5eb4044d1d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e864a8a24e7d9511d49dfaabd3a36c1ce975318a4092857d29afd11b84623f71 -size 583967 +oid sha256:22b08013ec9b46b2bcf9df5b2bedb6129c4d84cdde3003e3bcb1116441eff527 +size 584067 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 52fde2d19d..5779e59e6a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b66add01947222bb76520359b5ec1ef9fdce2dfa886107a5c608fdd14413c178 -size 566481 +oid sha256:34a8a2a740495294dad3f011fb5a7a0683750d3c1481b02ca433b67147598600 +size 566579 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 9b75728a5c..f96407ca3e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cbf30f4d1dadfecc6c64087faf9ef0137fb9f84e88016ddf02d9e5bf1ced386c -size 528541 +oid sha256:e339de2ecae47517fb511c6de5a953a79f1615adc2ccd42ca4ea56ea7e8ed693 +size 528639 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 4195ff0a99..d929deaff1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:060028aed3374e1ed55f222ca707dc438d1f7252d800103bc119bc0cd6723df6 -size 628464 +oid sha256:8ab2c3c9e762523ef3604c51d650f849305fec4c1cd18403566f84d1113ced05 +size 628562 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index bc7908ca3e..7cf3a3fdd8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:590a3bd48d14cee5bda09f6af3b488b3baf1f6837a05570f47310dc7b9e4dee6 -size 589685 +oid sha256:f746bfecbd30ebe9c9f93ddaee2868cfdd7c011f5f061b7f245857e8f3fda971 +size 589783 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 79bd43aba6..39d4a763ec 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:74f404e4960a2c023a44760f07db93d91aa0c54f523ecc77aecb7fff1bca96d1 -size 553203 +oid sha256:4f8ea480d9792daaac8a9971fe234a1cd3149df1442f900c49bbeb99ca59359a +size 554091 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4614d0aaa6..ac9002faa5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e8a835230037a2bcc130136fa0cfa0a7c112a657647351fe6c99ced244c8844e -size 519557 +oid sha256:e70c824841cbe9cf22573b545fa4ddc37377f6692c0c607aafb25dbe2756e558 +size 519655 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..f0153e33a9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:508e052a79012caafd275de85a080591abc4abbb90eebea5d7f8a6de8ec69ea4 +size 791240 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..077cc72955 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dccb4aa6256e56a382a6ffcfc2a269578f511b7ba9d30b1c6c4e5768e744937d +size 700852 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index d2949720c9..202fe6da04 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:90b1d1eac4dfc22a7d5786ec133934b6fe05a78c0dc9dc1accfd1c3ee57fef2f -size 631606 +oid sha256:d4070e80a66ed0caa70f2ed4cb1bfe37240f1f509827694e17d673c85a8e67ed +size 631706 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7cd7d390a5..aca985445c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a55363771905ab3d2a8eacd1e7bf231cc20b98dfbf2a0bac6ea581c1fffc014e -size 532833 +oid sha256:5640308f0b06861c6b07e64871960f5f4bfde1c071b8dacb0bfcd12934b44c47 +size 533721 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index e7d8962ce4..223fa5d422 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:171ed322b0740e5d5f8803ecc76d365fa52f258d2c3e3dd8eb04ff1821951561 -size 594801 +oid sha256:b57378c81ff9bd32dd638334145458b8c53200bf2cd46359307a609dce0207e4 +size 594899 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f67aa32dd3..0fa6f2f514 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:25d1b54c47017295f0f20efba2086c1e6703f0e8d00414712359723f706ce058 -size 501207 +oid sha256:26ccead197bd6d3043f1784e977de823992042a8845e0cf4d6ca46ba90ddb5fa +size 501307 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..ffe97fdec8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dc4838204c1f5fae7f402bd6aceb388afc43d78101dc67b7fba6fc6f4298406 +size 815536 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..4e4e0e92e1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2964a9cc1e8669dfe33e11c6bd3dda6c7ff838cc4523e9a28f595215421139fa +size 722978 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 1c6e243b1a..2ae74dc87d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:19de4a6d72903608bd20ce5aa7d0d872c6aa5f3a73905d39588f9ac9d86b3505 -size 574453 +oid sha256:e5ef718c573091d3b8851fdc046870001e94dc76df301e6de93d40caf6af90b2 +size 574551 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 02792e8aed..e83eac78c8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:81e5fadd87d395d94f005ab95e38525ef1387857b9f4aae30371f40e35de34b7 -size 468673 +oid sha256:0df5b469991be5ab43fcacb1e0a3a8ce3c547e1cd43cc0918a04929ba601ddb8 +size 468773 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index ec62b7fd0f..266483ab99 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4e97493af433551d62e8f6bfd5d49603e0dc19b3679347662f3be71c32507b72 -size 542433 +oid sha256:8370c013bae329dabab38ce61399cafa83250de98f34da91c406b3dd5a39f036 +size 542531 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 18a9d5a8c3..826a8d8db5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9cbef3564c3289f877e388044d80d0adf917c234868455fdf94fe1525456a238 -size 440799 +oid sha256:c188648587e1d14ef871d7f8315a9e9a6e43d0ed9e8e0aa09db9334ce65b00d5 +size 440897 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a8916b4e64 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edbcb66c49d6f3db3cd0ad5bf19def367d83fc371874ab1f8be112f95c5f3734 +size 614541 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..8a853e4e99 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2773667b5f452f003882c5417fca83d29d05a0a3a86424a759b6aae5735e655f +size 527905 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..b3273d64b2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7847c20a22db1f2d226ed7d6b73011f9ecab9fecbf0e120a706a3a7a70d8f5b +size 615131 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..71a2f87c87 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:042266b6cc37ec020d84139e7325908eafea233959b610d57a0b60338f62dec3 +size 530419 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..ad45ace85e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3efe56e8197069bc1866c9d2520e71b9bfd94a4c3e13000b5c5285db1b037f2 +size 682506 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..1bb6e79f49 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:934609b3763b95279ad5d2d58a41d0a89be7328a8e4a906b298fa27a3715b0b6 +size 594389 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..ba29aec819 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdc54d804ef9d8f6dffd35d6d02c18be882c8620685b83cfcdb404512c422e19 +size 702852 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..63dbc0f147 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e6cf40bf9ac06b75ab55c56c759497d603a56ff403f26aee615cd8344c23d34 +size 617447 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..922da83e02 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9b10c4af260c5c09ac69efa9b4880f6d3bbca9b33941cefbd1c442577477761 +size 689334 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..497d2a2122 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50edd221e687f32d2783472091bbfc67fd7f8c293169c73023c18053604bc67c +size 603979 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5b4bdd9e50..b02598c6a9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:274c0b3e2db0e7088f2453ca20a9d148804a271bac815bf63d165ca847f38722 -size 725770 +oid sha256:4db3ea0715cbf0b6ca07e36909adc02e348523637d949181d41b8f25590bdf1a +size 726658 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 81c22b84e9..ae8ce40bb9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a9d3d095e6f46685846dfe0b847bf7cba1332a90351c90c4212e43428ca2c4aa -size 686252 +oid sha256:f367686c8c8c03667163c38a3e87f215e22c036faf81a1684ca4985646e608e7 +size 687140 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c06e1dac57..a2bc562b29 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:350e37db776291e90a3e416931b74c0874f09946d1d22d873ef7204a401de8b6 -size 708990 +oid sha256:e0351f2843fde47994e6f0092cdc2a371d0aa459b3eb153ee02da89ed89eaefd +size 709090 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 672c74b001..a1952bc718 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aa898617105053e4bf0cf4d28345caff9920f080c32aa2747c7df741cf6e226d -size 676774 +oid sha256:4b111f04d93d84d4e80260e529a116be995b2061b6f7dbdb1e165779ab497f09 +size 676872 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..b19027a934 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7c5245aba14aef51321403ce4a50eae389718eb2d2c82bbc0a866544436f562 +size 704230 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..273638df7d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:864485944517e4a0e75a85d90a9598eaef39edb03e32ad8f000a4590e0613212 +size 621392 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index ed333e877b..cbde8ee92d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:da3d6f0397e25af0838c5dc1196e8f9cddf0811bfd6168fc968559e79f5ce0ff -size 719288 +oid sha256:72bdabb7231f4529fb598241d4fdfe12fb873f80d0744bf3757dda503af19551 +size 719386 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 68e450a16a..c21fb01f11 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0745e006bff9062f8f7e80da4a450c574da8ea512ec6b5ddff5db3624fd3a954 -size 625496 +oid sha256:2c16c4fc48dc4d856236294013f817fdc09affb38a59efa1fbb4d38ee9681eb7 +size 625596 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 4f4e5220bc..21f774a8e3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:80f50b00eb29d48ef8b3e528116ea906ef6fd89718dc423dae3618d55fa18fbc -size 670198 +oid sha256:70bcb5116aa8848b8a317ddef7c456528271bb2a31960433cbe9c6b39502308a +size 670298 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b8dbd39d61..53aba0f55b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3970c22c53ddd3803ee5872108951d6be4a6c8eb9769ee69fb22db3589493272 -size 589431 +oid sha256:dd15b501c14b9a7e19e2f63715f80a23cf7408e4a72bcc44bf7d3bc2553d2241 +size 589529 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 458c2d90d8..b169679ac4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cfd1adf0e73752bd4707a573fcd5ec1e7fc0a3a279e66054d85adf9297c617bb -size 712252 +oid sha256:1e638a920a775c002333d459ec513ace53e8db28fb9c7221f09157d1ded39777 +size 712352 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b424223688..3bef835d39 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:90b37b4c61607e71706d432e8f1e0b4e7cf5287456a240fc8ec682f19e7d34d8 -size 672734 +oid sha256:9d042a8c34d61a0077d2d7f868c527cafe81ce8195698094773005cd31cedcb3 +size 672882 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8e66fbb94a..4fb92a4a46 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:47e57d1d9efbf84c822531824340436b2892171b3602b338a3b31bd405284696 -size 695474 +oid sha256:eb82b51c60a7ddcf53393f33aa3d85bb957f56743347a6a2cd4542e902dfbf33 +size 695572 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f97ac72a36..680d7d2363 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:87893ab090c7b5138ffbcdef95ac79c9c80359aa410a70b2b452ebc791d20039 -size 663256 +oid sha256:a0feaf002a925c80c99b5c88565f2f1deb8446852a85b1dfc9ef2d4961bc341a +size 663356 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..99aef664d2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f93308295544bf66a8ef14e9da64af268b9b2c455776579e7896b70b8751f63 +size 690714 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..90adc55779 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1501c69819fd6909575e11c343bf30acfa6935892b502f3430e6162647ad1ab3 +size 607873 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 1086fc9540..17198ae38d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:29090cf15e3495d1eb0b085dde9ffb7e2363e3124c3223137f072a12324b80fd -size 705770 +oid sha256:d7b62bdc36b50b6c5c1f54e6fc363203a29ed84ba53f0ab4b9ac952aff2de18f +size 705868 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ce54cc6fb3..2e5182ca27 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:770fc54399ae756bc8c94e4aa6b81e18375f78fde308e094c0f0bdbdc83c1dc7 -size 611979 +oid sha256:2db1e68af9464c78fea572ec6afeff98aa0f652473bd1fd7d832a8cfbae1f674 +size 612077 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index c5dbd943a7..bd323cbac3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0377d19fc7b82de00ee35783c42aaa6a286c26d6ce8d403d05615d840a04f545 -size 656780 +oid sha256:b3075803a58662fc1dff6b16bcf8415051534a138a382f432a8c812638eb52cf +size 656878 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 95b89eed39..1a5c6e6f40 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e3b331691ef4319ad5f81269da7ccb690a9ac9b40a546e9cee0f46bb02b202e4 -size 575173 +oid sha256:f4fe5890797ada5d6c8224515287a02fb33c54aef862c6a45e6ebc04d3076a7f +size 576061 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3f450c4e31..ba3780dfdd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a7eee0d7426ec6eff6be51fa3aff2cecced74be676a1d5db1c2646af599c5ae1 -size 745390 +oid sha256:fca3c2e1c3d929ccabf4c46e33771e14e957615badc42352fe365f75eb8988cd +size 745488 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 0112831e7a..2700c61a7f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4da9c61db739c17dccc13f95cfa11a8032a2839c922cc3655826a85277b7706e -size 705822 +oid sha256:e35522e15575cdb6dd2fcac09451174b4bca70bf053c319f146ce852d5a493c7 +size 705920 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c2095751f3..2a0e097ea3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:57a376b04e0c38e730d58d7fd543848b4e794dae010754adc26529ee6edd554d -size 728610 +oid sha256:58be79347aa2069b88693419dcee97e7bf1b621426186d978a54d961135da3a9 +size 728708 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index cc7200472c..6196f95b7b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2b582277300633b4dfccc184c90cfe8292edd0f51bd67251204b20d26bc07b65 -size 695506 +oid sha256:e9c2654271d045f19e10a7966435f4fc5f234a3dfc17ad02c8ecd1f668abcbf3 +size 695604 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a76c9f0aad --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd75b3e67c343273b825e4bf534f400d06429e29e03b398ae8ee1fe8fe4c462 +size 769928 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..82480ac720 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f04d80c6fe7bc5c1f8c9b807ab145bc77da5b6ab4005da76f17983f93e97f89 +size 684080 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 7cc092f31f..6d8c028018 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c39a07b584ba8877bf1acdbddbfaed19f53c8b45b05ee525a9d6cabfa616563c -size 736440 +oid sha256:9c15521c947fa71f87be5eb970934d2a8339eb48fc93461c91c8440966389de8 +size 736538 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6f936faf4e..d7df19129e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:302b250358053e0fffe180369952ca1eef9bd2968ef4433ec2cf26859656969c -size 632930 +oid sha256:c0780fe74275bd124377c96e091f9b8daffd35d60c9a5ead1e224d924911b4ba +size 633078 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 78f4666895..cd95d3867d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6b0719050c6d3e17b020fbb7d87b4e623c60779225df5fd84914d9423a29b747 -size 695540 +oid sha256:8bf3b1adf6ebd12c913f3be01cdd7f3adac9afe26f20bf6ad6cc5e1c46c56cef +size 695640 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b6210fb71e..28e3f1c5b0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef91580cb9d9239727e335a24c367c1399dce56c890b2a44610c61701a940543 -size 597703 +oid sha256:63b252433a4003cb2fc6c291f09bc033f6d55439fe8e8a668870e49849e18723 +size 597803 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5f10943f4f..a9d1d8b901 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b6fce29fd2b910f469c6202e0fa6de9f1c1897a1e22a51598cdefdf0af48744c -size 731922 +oid sha256:fcb8476ce38d739da4f42e7337fce1aa3aaaaff761ae0b33afff129224b7068a +size 732020 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c099b60220..b27552ea76 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:27f9b1f6939eeb72692dd06e8e589489df828cbd4aeb6fd94732e84013707583 -size 692304 +oid sha256:c848893cd723daba66639034e2ce160bfac667b6b2234b18a08353c7765dd8c8 +size 692404 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d8eac9d9a6..8341d6d10d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:29bd3f4ba86ed89f1bd10afcca08d003dacd2405eba5e20d83a9d864a9cab05f -size 714304 +oid sha256:b8eead072fef6ac532e99114a8da546c3ebe8bde2abe24e2f582a5e2059ccb20 +size 714452 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 7c5d495031..92d866ecae 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:31746b15ca5ee700a8e9882f57ff032edf16da1cdb24c2eaf5f129a139c2ebeb -size 682038 +oid sha256:adab2c6b8f87820865306268e54e887c6b678b9f6ec9020a7644506e7a5d7c05 +size 682136 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..91a3d5b23c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a2ab37e3b4fb714027cbe55064915a5a2692d7e6bbe9f7f48d36040ec45598e +size 756360 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..f572786a90 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47c62b41955b8d27cc5b601698342a9fd9993a46ab0a7da33f02a16c22ef9534 +size 670562 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 667290e5f7..a9381b7212 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e1bf3a5f8fc9d2483b3dc9ab29265f961ae5871da71656fa24c34090028f90f0 -size 722972 +oid sha256:5ed3dec47d597b1e5be6a9d778b6f08e1d5fac50dadbe72694e58d5015978e6d +size 723070 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 574f829f66..0dbb47b199 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3fc23a7847b9f183b6afe2579096fb77c43293f57627e9b9b9f68712fb5f794c -size 619462 +oid sha256:d99284afd066e120e044734cf0115e4fe0e85f155e308f55331b3bfb445c0f79 +size 619562 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 99527a9c8d..312b9a3282 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1f18b2cfeaaea47f5e7333317a4b4a91fb3b6103182358d8c15ec2e0928dc056 -size 682024 +oid sha256:45babbe196c30b5476a2c3d4a90c41f3e8185cbcf7d3955d149f4d4e9bab3f0b +size 682122 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4c96bb9d08..1de1a7909c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:18c71e79179c6add65649e636a4346ecdddfa989ab288c78c1bbeb73b7a99d88 -size 584235 +oid sha256:72af83b8b030f4863776fbb254ca89b1ae16fd321730bff8ae17a3269346a1bf +size 584335 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..703fb610f6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02ab2b6cdcb6c03e16ba3a9dc9062677fe151b658e2cae7f74a1379fda62fc48 +size 611333 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..d465a1d369 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:519ca0aa75076532605165768d55e50dc5e6a7a6fc0d105232af292c8e351232 +size 528593 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..8e3fa7633a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a83cf0a8f712d3b89cc57b848dda3cfae33165c4a269bcb72e63be8be89b16c6 +size 636420 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..ec81f2cfcd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1337aba42aa311c3d0ba07666caa93547ddbe49c74cdcd5e2a425ad000bd9c8 +size 551755 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..0069fa4a7a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7838fc7f76ebe9a6f8a46971d06d1c4e5e2d986fd37b6c8d7338f6f34a51bd5 +size 609851 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..159986d1f9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5653e7d511d6108f47d94575ed8bbe1c4b4694a1ad124a63a6ed592fd3d4cd3 +size 529825 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..f8a888850d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4f85d25c1741a1a006087ad5f0a9eae68823276279449d804516a6192eefb5a +size 634888 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..a61319d153 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13b693c9dcab8e154fa361441805bd4c9dafb3f70939826952fdff1e39b797cc +size 552049 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..53671b4e9c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0dd28e5b8f77daee479fd7c9b855cf64854cee2f45ae465933c7b4391c1fdae +size 678016 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..b4a56856e2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07b51ad752c0da3fcfec10cb27a0d08c7de0d881cf90c4c4e5ff94a608969747 +size 594781 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..eb2b9eddfc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9cba23833eb51caa4383fe6da7e7b28c5c225bbff0cd0793c6867144ae9571 +size 702854 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..36460b22c4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fda37b267f8f6569049c95b03c419ebcd5b1ea8281c4b17f544416bb19d4217 +size 620706 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..60f7b9bea8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd85432573879b12775fc2969792939977d70f3a83aceaaa4f05e1af6afdcade +size 697670 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..c1733486f5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31a169ca15dd3d1eba38ec9cf85fb1eecb02b84c66d7c363dc838ffd37668307 +size 609255 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..04026dd571 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f7ae36ebda4cda5eb989f63e049fd2c01b3f701630eb332aabf224661a89f6a +size 722310 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..cdcc3c62fb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e432a727016f186353cd910895917c48d960ccfb987371d63ec96def622d19f3 +size 631430 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..9cce1ff108 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae0eeefa850d51799b272e4346864b38ead684c669f9271810ecfc8f3c3ffa9d +size 690516 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..c9aa8ec3c1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60ad41e77343d91720b8782779c0325a975e6d0d1e501b41679b10b0b88a458c +size 602101 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a6975229b5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2faadbbc63bf32e81992b32b7c60f005246f30e7c4d6ae451b79c42b00e3e363 +size 715158 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..0589305fc2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55392c2a9e812b784f2b8185139a43aa723127a5cfe9ad77d5298f9df258e5f2 +size 624326 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index be405b4e08..f253b591e0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ad4381f34131213960ae4fc2769ec6e31048fb1b74ef11c1349a4b55375e9f4d -size 689166 +oid sha256:054baab84b7f0475446614e2248f88b3240c267672daebfb182c27e0838f38e9 +size 689266 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index c0abca66b0..6cff8474bd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e68b478cd91b8c7acec8c76228190240a328a693f452760e5a94ebdc8ee9ba07 -size 715238 +oid sha256:b1538780f70ee0693c58bf49d4bea50da56df5a6120d055e0aad67bf2263dbcc +size 715338 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 740473b300..c1495ca1a4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bae2233ad44d90970e58df78326278f88e4dffddd7141fe721f1e3f94d0b5c8a -size 672092 +oid sha256:6b6d50221721322dce50495617b165feea7e49bb46b573cf2af182121118ebb1 +size 672190 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 9475f3f9f3..0eb322d710 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fbde558b7f0c5b012ccc71e560d9b65889194761c1d46f77b5e3f3a80c9ea20f -size 697424 +oid sha256:bf4cf0ae85e673e20ef8a6b8b36c3513ba88da858f1bcdbe159fcd2d39a4f92e +size 698312 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 9abdc5eff8..774cb75141 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:585573d4d31afc354717c1fbac3f16ed48ef422ca51d8c048bbb2af95685a9c2 -size 663494 +oid sha256:501701670b312d7bc8a0e95d198213be1ed96566a104f55565c8feae492345b4 +size 663592 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index a81aeb66e0..1b406cefe4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:92a1705610e4ac46c0b62e03c8a306ec9fb463e84afec0412ba0aba0696a4025 -size 578681 +oid sha256:9434c44138bb974dba851dcef8fe325faf3db97a6cee17088fd05a67fdd41d66 +size 578779 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp index d4f7bde743..920e6a8851 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4439961f23729f690e7e70d2b150b4414f6551db89939f2f1d8f2aa8b53e718e -size 690552 +oid sha256:f9ad579b2db0725627ce63acd1dfcd9f25208812500816471cd25fc2f8747549 +size 690650 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 753f9f943d..3547987290 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ffed06e2f2baf8604f8aaad88b1b87eecc455f948437fd7ee626624fb166f593 -size 604753 +oid sha256:8981d28886573dbb278dad0c3ffbbfde5341639560c4f940d396637d61cda454 +size 605641 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index f1111c7e1d..0a0092e79e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:27d08cb7f61d2b56fa7d865ec8fa9182e19d2625e8d3ec7fe19e1bdf16159b9e -size 682014 +oid sha256:505d469bac73988090028a20ebb5f1db0e347eca64ec72707aed0ab090033964 +size 682112 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 9b955d09a3..6f7e6868e4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b453b3ca9afce5a4967336da794aa145972938392cea23ad3d31e027dd2ef8d4 -size 708086 +oid sha256:bd68e68c017c25d3c5170dda6dbfe7099442079ad188e9110abda1917c847690 +size 708184 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 124083ad88..fda5e38462 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:971ceb2a6ffe665682dd292d46a4708a66bc76697edd4690479a5db3df73a734 -size 664988 +oid sha256:6c98d90217bbe20e17bffff551083c3cc4a280ab6575862d8b995b490d3320df +size 665086 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 6a401957a9..a9c1653f92 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e5ddd8c18d35a3abb368b0074a1ce3f71f04bc83f2ab56151ceb12fbb05b1b86 -size 690270 +oid sha256:fbeb658d2279dbe00916609023a113ba30f52895618446e78aca5c54fa850dd1 +size 691158 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 9c1298bc92..5266e5dc76 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:098b41f73119f4223cb9ba41d15467b539489e8af75e69f04136a84ee5be8bfe -size 656340 +oid sha256:4a146edc58580613c6b8f2d6978a121660686da509ab77738bc2913465522c88 +size 656438 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 7f26659442..a552afa44e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9bdcc72cc94b9b9f4afbc7e2b706567dc0934392fa5cf1a7d26c1116f25b2fe7 -size 571527 +oid sha256:5ece6172252719bc47cfe859777b93d67c16910af2fdbb7cf2ce47e6ea7c51f7 +size 571625 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp index e58bcfb2a7..d1667fac13 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b3a1bcfaf157a15ae298e22208e270317ee7823a613e335805a07e36773c11d -size 683398 +oid sha256:397b0c04e44495b7fd35e16bb88f80e5b8b7553fb1d57fadc5b80dbfb8c473ce +size 683546 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index b6301376d4..d2fa944f21 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8d3c94ec32bfff2a9b5cdcda8ce5d59f049fccbadb4cc90816dedafa14630ebf -size 597599 +oid sha256:d7013424c333a406507a736f336e5355fb6fbbc0b20a2ebad8ff1894f322a77a +size 597697 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8fcea5375a..dad6a9da75 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:653d0cd15f3b5160839cb29a006f51247ecf1503cf6ebe1999f8c14e9c8af36f -size 606973 +oid sha256:ec0a7ed6616180b2aac69b595ea397d52078f267cbe3a347cb37647b48a21804 +size 607071 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index a193efd19c..636fb7ce95 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:11b9d0375185c20987026f7dafb4d2f35542215b456a5ddd697c05d4c96e812d -size 583685 +oid sha256:4fae885122d633118e132571125e4eace9e5c085455bfb413ef884deea8d842e +size 583783 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 95c6b7f2e5..ae8830efd6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ff445dd4bffae7fc1378b834d129bdd8d9490ca2f5ac6b344ffe0058451964b4 -size 537387 +oid sha256:10e7faaf5ac936d2edff628b1c9a2696cceded0bc1bbfd70e308f4cb553f4ade +size 537485 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 30258d30fa..6997702e65 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a6a5aadc4793bf6384b4590df40467edbf1f383ee1ca6ca0282b141da64d3782 -size 516221 +oid sha256:caf1a7e724946bad873ffed4106ed84dbc0b8d90c205b8564a6b1ed11c89b8a1 +size 516319 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index e9abf49e4a..f6c4739201 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09f5cec758cbc5c5d572ade323bd2aae43c199583512da4b2a2e43d7c8bec01e -size 599419 +oid sha256:2b0088851791d43bd3988f13a80f5d5bcd6cf969735c0601fee662236d6eb4e3 +size 599517 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 53d2b31f9f..b34fdb8313 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:62867e5db79b88281e4583b978d6429c2ddc0c99ea34e6de696493e76debb552 -size 577365 +oid sha256:85bcf9b07bddc0a6c1f3103b3261bb2216d8649338f7dbbd86588ce016c04fa3 +size 577463 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index ba5289ce25..1507e25e39 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:67b26e396e0924e7a86541a0affd499ff7a267504799ab01f64b34e65ad000db -size 528107 +oid sha256:24e26e17f921ce127ea021d44316de93930f54e48559f015becefab558e16be8 +size 528205 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index beec6cc542..2a1761cf2a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:51f7647655e66c2a971c18c2eab46675d67e11df1b96cbfe63910b201299e666 -size 509111 +oid sha256:cedf092381c4764d26838f69b277bf73d86f28dcf526cfb57af84d0bb8a8d666 +size 509209 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..6ae9f0e262 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26cbd54256fa48d654c46fb1a4f78ed5d4bc42d393be193efbcf5f1d56eb64c6 +size 692784 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..4ded6dea60 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b05e0470e667291b84f3cfe8b1fa4985a8f5cd976a7634eccd068fe5ce0421e8 +size 610043 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index a833ad33aa..7430f20682 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1b8b988d503bc79be49f5fc62331c73668dd2ea25873eec26ab4e6d163f78d3d -size 604091 +oid sha256:1f290282f595fcdc4f6345556da4d3a0827ecf03a411a63367d4270b2d8e8603 +size 604189 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c2e19dc638..1ef0ab9e29 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:552008c188cc875efa0f905c768fae1e5f4a5f18b13682a0896a95dfdaee5c45 -size 521351 +oid sha256:a3f9315e73c94f05221b210a15048b7ccb96468f407bdf5a82067b44b665adbe +size 521449 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index de5d085a6f..1621cc627b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cc326fea62de3e865cb860c2fa2508a12be43563c9c4e84005fe61e1e46fbc22 -size 578485 +oid sha256:2375ae286d5ff6a35770966bef3760c6fcd00c70f7c9ac1ff6f63973c12c00b3 +size 578583 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 408c81838c..e763bfe7e3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1120e2e0920bc43d870ad1b975d9fdd8bddc6cea455056957742b5591cf34a73 -size 495893 +oid sha256:925e0e8428f78072eaebe3a86f4cde985b03c6a7e7aa0384d805993b0da48fba +size 495991 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..0b71a6c656 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f3ad5fdc82f9a2926172dc6dc379cb20a30af54e70cc104277e8e5fc719ab3b +size 718608 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..ad651e90ba --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36a8cc410a15a8706d90e3492041a512c629389bc18240195d11d8d84611a60d +size 632268 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 3c26e9adfb..a6ffea6ce2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3f21bf3afb185f4303f41f5d5059d1163367d2e4d0926265cc559d1cae95f1fa -size 557495 +oid sha256:0ac8608b0b15f0ff83af887f1eaedebd1b62373f2a292b484567909b8418dcf1 +size 557593 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 67be6b5566..c25bcc4602 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d2a5186d943baf4f33e7ee526f700827244394e1095801c6fbca4edfbf425c54 -size 453641 +oid sha256:52eecc401dd9714e48afbf1016ff6c3eadc7aafd7c022885b4656b805c4910f5 +size 453739 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 8608cb30bd..deabd09755 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:22b00a70a19f22d238ff1ee2275c4945195311617aa9157eaa1b48442757141c -size 532579 +oid sha256:5b84aa03ff2d37d89fedd6d7c5f4dfb06c297c86ba78a7a5071170ad9818aa99 +size 533467 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index dbe5d387b4..735daf3e8f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eaaad065b91c8c04782766caded5a49025bd820db2b3bc759d97db392b4ca0f0 -size 432671 +oid sha256:5bd3652aa00d504dab4d6bf730e2fe4c13d32b1f1a2bab413f0076a17f085472 +size 432771 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index bd1ef7f19b..0bfe5553a0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3c4c150b81cb303efcb9ac2faf501322bfe49381a0d0fbda34186ddba91fe903 -size 599819 +oid sha256:44280f45eb36cf4047eab7f88dd5088dbb56b30fc3604d51181244b96e8777f4 +size 599917 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 7ca9e694b5..381fb460ed 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:428801fb82ceda6f9b20c5dce9de89cc1476b288a3e3fdef2d395856e8fcab4a -size 576531 +oid sha256:ef6a98a7edcfa1167fdd53fdf798a901668c71a2b2a7093c659dacc3ed6978b8 +size 576631 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index e91edd02a0..1a0d17e3d0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a4eb09344e238164d5f11962be5ecc2a6949b34c6bc0bf4f99b58670aa76ce55 -size 534229 +oid sha256:3d73a31fb3409bf304e67dcb72c59f5e1d17d102d2994459ffb49a99d65dde6c +size 534329 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 66c1f528dc..db90a30cef 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:040a5c6bb4682dfe8b83ff31c018acea8ff058a12b31dcaf4311518ce7fa6592 -size 513063 +oid sha256:ab3e466077e9f81812a209ed31087a56b097a9bc7c5059cc501379d11729d2ad +size 513163 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b16e617db3..42b11c62cb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e9205874854f72980d925b9db95fdae23554893b43a1652e88170b9c5e378d79 -size 592265 +oid sha256:f4c8c4d72f8cbede460e663e48e59fb1903625bbc22ccb24e5281c095c4e0151 +size 592363 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index fb651ffb0e..4beac9b02e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0860e5037ba4e5e34915c8ddd19bc72debabb78e3c45c956e02c366c2ef3eb37 -size 570211 +oid sha256:958c69f4fbdf624bc24407b602cf16edb9610605df9dfac93023528b4a51d25e +size 570309 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index d1b84185b9..bff9fc870f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:547a8ba5c98a127cbee509b16254af907f2abc051c59f6ae79416fae71c52df5 -size 524111 +oid sha256:42a5415379c755eee6d6c86404828cd9111ca48a536287b16c9525870e8a34b9 +size 524209 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4d7a199057..686a70f648 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:437c721d3632e624abf97e53c52ff18ea634f96fc9549637c3bb1902a54cc810 -size 505115 +oid sha256:2a4a8db884bebcb47e0032db7e86f2acdce8daea184aebc6e20a4bca0fa52d80 +size 505213 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a9ad8f6063 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93ec7db08be4a8c071111118bc6d21913ebb10a53798147b390dc7abf85f4ba5 +size 685532 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..7e70e5e96c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3f36373b446be84fea3c1c052133ad88d2896de04aaa4a471b16a91241de80f +size 602889 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 8da0fd4400..63116e3f80 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:45f7d96bd19197ca80fc14bdccb5b8c39e84ee7845c31a781a5c6ef919739fc1 -size 596149 +oid sha256:1afcc5251a01bc764c8629c39df9994c9d7c083a825254ff526cb910058db495 +size 597085 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 0e1fd84606..851c2f3974 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ead59b5d4b7b65d0ba8b5c03fdbe97561d4859c065b68d6884dc788ee5a4c52c -size 513409 +oid sha256:80796e66de25faa50a7f756a57c7195a84d497329df49d9368d837417b8ffbdc +size 514297 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index d5a094013d..8d75f55c30 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0ade6277bcaca97b1e14574fbb49c1a86d0a86e79c965336e6517f43813dda74 -size 571431 +oid sha256:9fa7e70a0d80519ed32d83910d066950ea8d7587e987d124f5d26b72aaff6640 +size 571529 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c6e3a04fd0..eedc654cb8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f09b185395bc709460dffe4f220e7e41f677c08fc43e04d9960fa5bea99ee42f -size 488789 +oid sha256:4cf61507d915c1240b4b49da6975ae98c13f1c9de22cec3757d6990952c52cf8 +size 488887 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..0cc5987570 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09c7287abc2ff49a80c5f2b186034ef8f2316698815a085a2c8b3efa41a3348d +size 711554 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..ca4438c2f7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8ff391f39a8c3156b8231b521fb81db739c100df5d60a3d1e1c43ad040d02ad +size 625114 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 425463b98c..8c6d67740e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:90ef307d5575bd927cce8a9373d645daab29ee975da83e380143b450ac1dc798 -size 553549 +oid sha256:44c4c1c6e72893fc86a36a128194e3b5af71aed2e479e941d1c2f470481e5a46 +size 553647 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 42b87d9129..9a3cbccf4e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:25c15146b489043fef5ce1bcf68613aacd2a7b411e3e5ce34ef7f97547d0fd42 -size 450483 +oid sha256:1f8a1aaea6c477e369b9e30033dc2de5d21705f81b86ffe2162be308e28e015b +size 450581 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 8fb9a67bbf..f48f13cd1c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:577648ab88938e35990b974ec30ad4eb1de3d08017f92b081fdc60d217e1b1f9 -size 529423 +oid sha256:a2eaa8243c396077897c518512d250b72090d3e5ac6e5e4ecbfefcb3a364e103 +size 529521 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 32ebdb482e..613a2c745f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:585c4ef0eaa4c4d934b058471233b54b1fa7f64e6ef1c1104aa7254de35cd6c7 -size 429515 +oid sha256:adf833ced22c4cd35b35d12354f1bcff816cbc787838ef4b85737dc865dbfd26 +size 429613 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 2450c88a6a..76867ca5dd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:784bf083a5d63d1cadd617a189450f565c82bdee78473526e0b4b78d4e4c8bf1 -size 627234 +oid sha256:376f08bde5eb19f8da5320d50edd33418d2b2561cd934446ca86a24bdae64c0a +size 627332 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 0cdaf10823..23ba965474 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:60d1b538ed940f8fc8bf04ed5981b88370380fba6db2fee8d15cd0c8427c13b3 -size 591809 +oid sha256:2661eccdfb1db03281e9c931a8c49766984e2668c568dc0a4f4a715f2c562ff1 +size 591909 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 959d401e73..5b2d1bea01 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e032d7774629071fa85412c8b284f3135fa7f7fe513a352a05c6d6f4b326be6a -size 558881 +oid sha256:215a262eddee329c557ba1a457f99a2478bc7fc581f67b6a562124f41e10930c +size 558979 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8d33ab0a38..95c2aea924 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5f0f6e6b155b6e2d844f89a271905147c4dfb57a206cb4f1c309243f88b1a87d -size 537715 +oid sha256:6583ec017ce6ba928807e66ed7ab4f4bd5fd0f2104ea724d7d0e9bae8e9e6805 +size 537813 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 40b86e69a4..42ebc35171 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5a1269424c3a43ead8429f988307c6c427d99e7a01021ac796a6bf9d88ede999 -size 618102 +oid sha256:0326d1a7698a65e597b8c3e55603251f6e949ebb2db7b70439d0e26e01fda8b8 +size 618990 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ff9866f648..2c2001f537 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:01d4798912cacadcf38182203152be6326f87c6e3e0c1c9dc417d2d8f27ad749 -size 595257 +oid sha256:53e94a4906bb4d6340ffb6cbc2fee45e537230609aca9faee3aa960497077459 +size 596145 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3e6a7e817f..8ebbdc865b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0aff9adf669e4f84c7ca3585e1dce5fb09cc629772c33e70bf7957bf1020c4d7 -size 548811 +oid sha256:138545e0fa605e3fa32bd29f463681a02889e4911b3f7b5df090cfd38f099489 +size 548909 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 80dc114ba2..0420afd06d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dfa967cc95a176f535d8168bf6c9964ea545623863333d03d67f080124db131e -size 528829 +oid sha256:fe4dcc27bcf4fb9de2a96a221d14810ae63e8381a334d5746f839f1bdd4c8c26 +size 528927 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..c4f2aa2d78 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e03098cbb6cdd063003b34db6a6b185d96c4d10ebd7820cc3bd68e95b2637d19 +size 765880 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..0f04e7545a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae68c516f801326152796e424c16a7451e0f13a2df6f2fe700377053a294dcdd +size 676382 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 468c0479ff..e4d4ec38f9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3720087363c7a0e9b58aa040f4450748f5e32e305839b1406d4bb0ea0b0af04d -size 629928 +oid sha256:ace86f92633aa4ca949887d9956408a2cc0992a64a1311141d651c2f7990962f +size 630026 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8f6f06b6c1..4a678eaf1e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6edc04acc5f36d8e9442712342f3fde2644af00840383a35b769f79d724bc16c -size 533127 +oid sha256:737f28f0630e6b7640e5b31f1cbd0817768cae4561d0c26e72844b52f5919146 +size 533225 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 12e3c2b04d..d3f4955fbb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a06bb723631ee1776f7e5f7b7c79a16a826a21ead1be9e911da0ba5471510453 -size 601163 +oid sha256:c4a6c3de3731c2ec3de305bc90dff50e5e628528299cbfc72dfc9c437defa068 +size 601261 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 477c9f824c..baf3e10bed 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6357ea8c3f9abf6ffc8665ddeab6f0ecbaed2d9ff8d221585b6a61a55cadef84 -size 508457 +oid sha256:5155d5d5699aebb1e3b0512e1488c651eb1d39b67ea24e14c6811f291bdd909f +size 508557 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..f26a9b0855 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d9a5416a5786da540f73adcfea6810d0b2ede6da741b491ce7073c6a640f3cc +size 791854 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..9896e42486 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e097b06ea9b598cf354f8df5649336b13f833d3ff04c88e7a847202af5127dd1 +size 700282 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index d6ce933499..2022d3c78d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:90847362d7589c736f08b93ae0988d8a8d13e4b648318dc07ee5802b3946051d -size 578595 +oid sha256:a6b9275fc8e2e74ae6aecd12205c7d7c5ac482670492a76d677ca2e208fef8e9 +size 578693 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index cf526e083b..05a6044937 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3eb765b948c759a6d395522ff1bb882eda25d892ae179bbd84190a54bbdaaaea -size 472619 +oid sha256:d9dcc762665446e992f37447bc9ef4995f5f21100d451d0987b7cdd6327c335d +size 472717 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 2998f7daae..176c665cb3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b8883cce0fea0839ea190edc3d07ae9868381ac2817e2a192e8502936e2f3d7a -size 551705 +oid sha256:ef100dceb3b00cc59b6698c8e5646a70109626512cded3efc305211717082b9e +size 551805 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 7ccd9efcdb..c13ad3f2e8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bf32462a5a7f48133954155271f53047e23cc2a317cc9126383ed10f34f63593 -size 450071 +oid sha256:d7bd15ef4d5a501d72a1f75ee85a33ba4b177cadd1d737005519a1bdc1cbbb39 +size 450169 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 84c6371ae1..d3edb813e6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ad17373d61f495cebf76597a086ae8daee81db530e488c24e7b4269ac655d60d -size 620080 +oid sha256:083e662d0fe457ead1e395b0bf091c5492ac09745f3149fa535694292119488a +size 620180 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b07f4b348e..d573063c07 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f8bc394a0dad9b58b707058e143ae2fc57cabbbcbc720ef093baa32f75565176 -size 584657 +oid sha256:fad2df9023ec05688e046a47735047f1e641f4ff1dc5c3540a7255bb78a3449d +size 584755 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 259507d5fc..c42dbd39c4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:62c175eb81eca3246ff02daf45d823b1dd4a66b31d30f985e2fcfff4a6133b39 -size 555723 +oid sha256:3a6a5b7e100c23da3acdd09afe31b98518277268cc85274c04e14dc4d635a206 +size 555823 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index ac86058f8f..2920f8b832 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:14e08932262e54f1d0a58ed73fcc6720cbc2419bd1adf72aa1b4151ebb7a9b86 -size 533769 +oid sha256:b93ad98a196dc53d540abe77b9cbd8eb3d20a3df902bdcf19f8b80518e26145e +size 534657 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b94b8739f1..860e37f4c5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c01828a96e2149d58b204cc96159a9a5a68fecf4816163a06538b2a186b5e519 -size 610947 +oid sha256:52b1ef058341ad8bdcc8a87fff09872afe09e7ace27854816261e619b8bd6ea8 +size 611835 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index aad8075316..f3edda8b99 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:77cad6283cadc6aebd5db974ad6cdd991a1991209c402678922f1256fb1616b6 -size 588103 +oid sha256:01290337dc88062e1561f47772e410a5ffd8f0929ce6f8c9f4161e3251a52918 +size 588991 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index a54bb307ef..c696dbb5f1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5cb8b9a96643e54ce837b922dd593e14aaec6a52b0dcf51f909a8cd8e675daa1 -size 545605 +oid sha256:a6b9cb63bca1d2f71f0267eebeb889f1edbaa0ba5d9267cfece332ac732198fe +size 545703 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 331384aff0..675d119ba1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c91febcef0d43f4c2c1793b5dd975d4edc6c9f3bc58db0e0a2768ecbf4c6a72b -size 525623 +oid sha256:b1c8ff9cd6bf7bd3e345e2764b22a91951704cb1ebdff25fa3f7b802713d1fb1 +size 525721 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..52a5be8600 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05ae20ffb49543ce5020f180c4302b8aaaebb8c637d72f8a0aa8c8a7f9069e15 +size 759516 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..cf7251277f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:747ce99ca9981c558b6bd23a93b81b0484f4ad448a6c869748655a383ed3c759 +size 669228 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 1acd4aa17d..ff65522ba2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:79dedd01b1e5a91a90eb1986c9efa7f01b8eeaec5ed1bef7b2d9efb1e6d80377 -size 622774 +oid sha256:6deca514c943fb51009cf2aa778a39c61b93882d28b35934039034b279a75327 +size 622872 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6d8b6f67d8..714173ce3d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:97bbfc240b7466433189748da96cd5b2956b1f995b1c6fb6940294f4e1facf66 -size 525973 +oid sha256:6c2f71fff5085ec13201164b49f030c18b037c3e02ace180e738708e82785b87 +size 526071 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index b374839465..ecbf928a3f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a3f5c7ee9ad82588615cf5b0312aa7bbc211251465da658936fc40d41f376561 -size 593221 +oid sha256:b178e1668dd3f5faf866e13df98e331945c75a9f70a0c8b4017ff01a10bf8f89 +size 594109 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 604bb0c6ce..559e79a010 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7f169b44c67f4dae0089e8fd81923d1996cb2002b30c0d7b03d70b2a9955d961 -size 501305 +oid sha256:f2f719d89b406379259f90813d0c5b2422cabccc87d218ab15078df4dc7d5d73 +size 501403 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..1410c30921 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8935733059f9c4e1f2eb7bbf7610c416d0677bfef94e64d93892bb6b958a5167 +size 784750 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..52e2a939f2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f61968eb3c3190f7fef123e9b33405e5477ef22131a542d4a611251825b0a906 +size 693178 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index eebbb75963..75f80228bf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ee7d22493dc355b43b61602490e4dcde9b96f1bb080b0e7fee3ec6ab4b968820 -size 574647 +oid sha256:7f65708d4786a50db478aee43842ff668a67756ed07976b6282f3063ed4d3863 +size 574747 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index f470a5d79f..e444f075b2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8009868f2e8156ac022b9f711ba3cc4f72130b62becf2d822a1b854650741062 -size 468671 +oid sha256:fe29c12d9fbb84aab87155f88ac273be656e083d5c6bc3b43e76021f8e955685 +size 468771 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 05a13cc42b..08b8d96ba9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ec2930e6536893c64a6f91ea1807eb99fec4874f2f1b6003b52aae3fa9787a3 -size 548499 +oid sha256:0a0cd1f528ed4d87d215e4535d742e1d2479fbde9a69b2c037b85d5ddb41eeb8 +size 548597 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index a6e036c5b0..10bdbc9cea 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4732878fd196654743d2a7d8b565ab9f98678c6e6d8fb732cba587e7aecc474d -size 446865 +oid sha256:7fd0a45e7dfd9cfc1dea49f8098a9fc84a4b00db98195940d18c09f43e2b2fe7 +size 446963 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..ff42fb87a9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76703deaea412e3042ab63ba74b498330bce42651fe727e545b36facdd4476c6 +size 705908 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..944245d295 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9881321169caa32e485c6f9fb993f985913c29a3663b3b6f05cd8fd4a9c4096c +size 618432 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..84c6a1574c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f30a3b46f17997b25858e109db46a7a11b291405ffa92438dc297cf787a8386d +size 732572 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..302e0763ea --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a744dcd85d707a82e285f792917cb6db333ff30655504f582ae49baa9ba9652 +size 644750 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..fa4c857e20 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cda8ddcc32f179f870d44a5caf6562bc5227a0f57d78e649f48ee00401976b9 +size 703192 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..3988a32a47 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cb2b5e9f0388322db64dbf75d635608a22e685bcbecea1e15ab767e556de54f +size 619318 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..02080e1dab --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca1e3f55ab06023c9a2971a0750c58342c919632de753491ecda674f6bf88e99 +size 728278 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..2306137890 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c889eda6932b2e832e26367b8411de919e2de7b81f247d6cd4414deae647289a +size 644452 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..8c071f4467 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4623c8e0509e65d0c62d8162700af0df5b12cf8f468164d5f081177f3e810219 +size 772934 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..3ee81e0db6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48572d6d92e90d91498eabac21e4ce9c57cf5f14dff9a00be555bff6a77865df +size 686050 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..faa4a04551 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1ffc763e31ca59653db4639b1bcc9d1d8c508a410ec2fa23f19eca3845f753f +size 799056 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..696df6d6a4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7672d56cdf091adbb8722b95016292b5103e14baf21653e9af64e9ba40ef40c5 +size 713702 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..935b917729 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:091373f12682c8722df1779262afd6669a0dac3fddf4c7547367641a51b84e2c +size 806896 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..787dcf6996 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1997c3dbdcba458f130bd5b3a59c3781b1dbbe6650c64fdff9eb89ee3db70aff +size 712512 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..20b1c582f6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:596506cf9bd42b908c15f61dd606c367d24a2867ec23d6335c652a3244a565d7 +size 830402 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..b247a2b2ad --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd2db924c837f5033998f6f7eab5e06c736a48643907a2824223e844cfa66974 +size 735230 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..3bdcef15fd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad0c7d7a151123036ea35251bd5226448e8db0cb385a592cf84a5c67358226e9 +size 790566 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..0f39163480 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31efac3946749cc4eb8171cd9ec45765597fd3c121c3d6f1316d859fcfbc0df3 +size 695542 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..6ee1d81367 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55d69a256e6657f504e07122221a9c55ab693dfaa6a9b408973a10619866c6a5 +size 816342 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..454f811c20 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49c8334d73c2aec1d15022d76e7aca7fce3b3eb606c53b5b74910cc677807261 +size 720972 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index bf3365cd0b..1da39fedb5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:730115ac243e6482a5f18147775d5ab2147345cb75920e0f33fc8bd8a30b3685 -size 794840 +oid sha256:f96b1b9ec3cf0600ee0f1cc4dc4e5ebefe8fb07397320ed2511852a0357ce7b0 +size 794940 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 8421293d04..d219312377 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a9db32716cb97f784457d218207f1bc08316b985c319a51a43167be2bfe0ead1 -size 837094 +oid sha256:b6eed922e12035f6bdf536c423ce96c33d79c38397408a8d62e422d817ed7845 +size 837982 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 253e354278..6821853a0f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2dbdcbc624048401bf67b7063c64d8975e271997f91cd34e58fab28232076744 -size 798090 +oid sha256:7e9ac9613ffaf5d2daf6c5141736eb1ffce84297cdf9308ef135576374366879 +size 798190 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index eee2c9ca7f..09548c4c28 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:32b3c118aa06f59e378e9caabadf116b8f5ce404dd2d744c0d6faeb43e598438 -size 840394 +oid sha256:fd26160158f644f2e87f58f89d20f31eb76aac87406a490ade40b34fab589c33 +size 840492 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 3e91b86d41..9877dcbc77 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b5c66e89f62fa9cff76bc2c5758f5a4fc0a21e9e4e483080b4c03618f3586660 -size 772424 +oid sha256:4a74e4df74c7c7132ee02290bcbd7702eff1a54070628b388251974744c1c241 +size 772522 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 0d87b51f09..3ff45cd786 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d38eedbd3c67d8988d51fa9d0244235ae9e84b153097aa2e2aee8c1795184d7c -size 684898 +oid sha256:167e4efce498ab269074c575e396d15f72845f7b0487c98ffb6e2853b1b0d941 +size 685786 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp index dcbe741462..ecc8d0b185 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cd2a9f7b4293ed433dabadd9b811cfe99c286c0b9b3e0d12bab81cecda1a70be -size 817144 +oid sha256:98f818713117ba94eeffc7cea73bee7d980dea9a3eccef4f0971401b608c1423 +size 817242 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 2341511ddc..a385829fa2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5243b9d7cd18285aa247d25ad44d620455684abbe832bac69e2f268a7d1a02a8 -size 728040 +oid sha256:16354885661afed48f897311cc29c51f3b9d6f0253402a6c7d4d78ead59c98bd +size 728138 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index cd64f10021..f3cfb68df8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3d01fe036e86ce9553ffad4cfd680062d539206543621a78d28f6d9a110e6a41 -size 780584 +oid sha256:6ce80e403ab21f4d04693a68ad5f2de0273e73b3dbd8d3c856d2ddd4f538ae60 +size 780682 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 4cadd55d16..829ccbab16 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:76119dd30f5361e7ecb9eba304213759609296a9f468441792dd92dbba99e8e3 -size 823576 +oid sha256:4155f1353033a8c7266af9ba09c1becc23d50f98b435c6c0a103c4ca065b5161 +size 823676 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index bda4e3a988..a187d835d9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3426a6542e37988d12e40160480f31b2ea7f730979f5b2d273e1de67392acefd -size 783834 +oid sha256:d26e1f333fafa1cb048878478e9e74b12a4a7c6f8b3917d485f24959c429be6b +size 783932 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 7812175339..e2c7a7074a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4a93a589dee5bc7a57e23fb9c7b2a26be78e3ff5e0df080f8bfe842c811eb8d2 -size 826086 +oid sha256:971efc746990e015583e01d7365fc70176da1d3c9f6c42173304a7f4314467e4 +size 826186 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 1346ad35f8..c21194345a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6937285060e56f386054a2462005ec8baae1d0edbe2357a49678fad7015e68fe -size 752690 +oid sha256:b04972767e836c2a2be780a2be08c218f290e6efca7fa5e1e73dcfa069d89b71 +size 753578 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 9e07e821fc..47147834d1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:17044cddaefca6a60b9c249e0ef95ec3499ec34fedbe5ee1f4d8cd3e13469706 -size 671380 +oid sha256:82f2249f71263d11af91f737908c6a046afee36e23f1512cfa41abba4f7c49ca +size 671480 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 3470348e5a..8570399c3c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:750fb11808684c8937c63b2bb9aa1463945b1ddcb0cbaa0b0e4fa14fa2e82a28 -size 799778 +oid sha256:6dc2606e4d946dbb68b7f7590ea9f336b928d9f4c7fffde2a46911d97b877129 +size 799876 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index c1d96850a4..ffe0a52625 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ee32c9d5bb07b84ca66edc170f4f0a232db93facac9eb4978bcf698db8f5856a -size 713732 +oid sha256:c6b65e7621f8899b23455a55c1688d3852fecd84f5c062d4dc6cb74b7cb6afee +size 713832 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8b61ca9c46..adf883ad2f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2f3cdcf6bd9c0bfcd8afc51210bbbd9db8567435af2394f3be9d8eb8c477cc31 -size 664054 +oid sha256:0bd44096b948e8a29327228900cfca0202ee5323f3fe2e1d9674ea9ea6604b3d +size 664152 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 51fa4e76b4..9f69363457 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:abe5f93b8a8958466f772bfce5bbb1ae31da33d926ccefa9b1e6e1a23feed179 -size 619702 +oid sha256:0a5a54820623ab817cafd49dd9e0498fd5f463c26b3294f3f4058d91492d242f +size 619800 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index a485829762..ddae3d21e7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f0217ebb87a78e0f71993186d6985e11a6c09f18d48e3a8663cb753911546d6d -size 563931 +oid sha256:5ea597dd5ab4c8cd4884fc6286187508735f56b02f5e6d26346b463508a869c7 +size 564029 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index fb89c313a9..be31a0f35f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ad06b8030aff261329aacbeda730c9c876dec14a37c787910fea508fc33301f -size 525103 +oid sha256:0fbd20197b34c9d424a5908ea81f5932263c97d7b039dd1b9016d7644437ab01 +size 525201 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ebdef093fe..96dbc10fcc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:39ea69729a451167d0adaff2b8cc120cd153b67bb16f852d468cdea16c1706fc -size 652208 +oid sha256:1f1fcf9656b90187b54145ad40c0beacd1451e3a0997c40b277108811370e77d +size 652306 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 9174f02f17..aa45784123 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f2846c0edf5623772d65d014efbde8fe5f090db2cf75888092962f0d542aadad -size 614267 +oid sha256:458a948426e48d0bc94f4e3ab56c9013e045ac595120c031e496d9faf407a19d +size 614367 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index d678bbf44d..ed9f7846c0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c889d815b684be65fac5d2513ef5f59f30f30ae91e97cf43049a8ec6fdfb671e -size 550653 +oid sha256:21506a6c99cb870dd7b32e2b249f7c1e06d44abb2501e2701afa05f09f2ffcb4 +size 551541 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index e4cee9c573..9e10d3d77e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:066caf44e7deba0bf52910fb73005536037021e60287bfb744fbcfb1529ee95e -size 517007 +oid sha256:043ddf98d80fad691a009d43d96aea20079829027aeecbf94a1ee08b30c9c93f +size 517105 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a06ba52820 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bdced08e981825bdaabc213b2466f6eec45f2e11fc5b7f1ede4044a34399927 +size 802602 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..89a246f6a9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bb4cbbabf9c10fbfd2803c8bb6ed905182e67f7c87afca21dbc24d486f6ce0f +size 714780 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index f5368c0e4f..61462b6832 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4b8fa149288b049749dcf6786e5d70058ca080ce577941873d360228c1796833 -size 659496 +oid sha256:dcc212e38b8c4bdafc5918967a90bfb25f889ee065d6b40e0dc5fc8dce8b4d93 +size 659594 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9d3a6ae8ba..0964d799fc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8b5f744f616b5665c25e7a7e493f94fc82fcdb152301ff908ed546e2b872d7e5 -size 564667 +oid sha256:00be51e48b31bce0c4a36e24d4c8d933ac00839d74a6f62bd2d5c357dd9872e1 +size 564767 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index cd0c97870d..32b24dafb5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3e803c017c8479326709d83939da44ef7195ecbaf9b3ef2e83d0451838066579 -size 611837 +oid sha256:166b7f75baa1158845d5d2c249eb734249c630b4a9e47281978adb311d4cad8f +size 612725 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 96d9f4e58f..a896a28953 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:75f7fe548a84c5135ebafbab47edbbf7e4cec248202e4beb1924a62baa740ea2 -size 530429 +oid sha256:ed4eb1c1d8a085b299e74209c6966e950b55b908e189d07f87dbf344c19070dd +size 530527 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..dfa2e5d764 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a385079c2c43cb44e255ee769cb59748cdfb57db17c26c66b2489215ae149e4 +size 826108 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..24937d9252 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a54013d6a7eac7de675147d96ddebd4b75b5b9b8b7fdc7515403d90c0336bf9 +size 735770 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 258d2f18e8..9dd0f1f188 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:32db5375ced30c4e7f42db8d118823b75cf78d07f5bf78bafa8d16f3e8371873 -size 570817 +oid sha256:f8bf55259890bae0ccac80be724e77394fcb350bee0d3e0ebda74f6b180c5030 +size 570915 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index c7e5bd9635..a7c20beea5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1ad98f43c16a9d919c8b079e5db497f9d180f0d7e93237f38fb14bc07f127cdc -size 468591 +oid sha256:a6bd1dfeabd9a2d5a0c42820828d059c90c2e81d47081d9df368e03407043de8 +size 469479 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index c7aec6c1c7..ed2fd1ff55 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd14b205279cef66237d16487b2fee30335fba5a387ad6204dbb21d966bc6bf9 -size 539241 +oid sha256:8161e8725c5ecdfee1502a4067ef646ad67585407685dcdf5b7310d1d7d2133f +size 539341 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 42a0b4099d..f8bdfaab43 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:77ae216556b6ea1b58cad605d63be58a11b79794537adc2b9254f663f272ed0a -size 440765 +oid sha256:bbff22586e9bdede2bf9eb07609f8fb2d441cef95f91dccba6883c5b89c1883b +size 440863 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 446bd376c5..790c0f001c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:01a570b0bed5c54790ce45024c8435484d86acc31bd1f83e24f0fbbf669a5201 -size 649796 +oid sha256:86b06611121dac52e36c6ba893cab6f0e041a82358aca87cd5d7464d20053f75 +size 649896 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 10aa8c385f..8a921f11f0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a5e4a649749a912a33c95bba4858d03df144fd32b0440a333d79b119b5bfae5b -size 605393 +oid sha256:487494fc844e92cc840fbeaaee8c8ead99991e0fe3b6fec320fdc231f405b4cc +size 605493 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index bb7dc0b07e..ca2d02e100 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:02907a344260263629352c857d379a490cdf226d6dc611d0c0268ad4b4ff0f45 -size 556827 +oid sha256:e64bd09388bd890c1d43a4ed8e9d58717baf119eb5db2eaedbd24e505a24d738 +size 556925 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2e14461a94..8e5809f02a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd3d413a7d2c83b8c538ab43734d7e130b0516a8b46548c1c21df969ed319e9a -size 517949 +oid sha256:b3d4ea5b58648b165c3ec95e3ac4c895dbf798adb0d2e8cc09b46e169bccd3a2 +size 518049 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 724b5feaa9..6095b2751a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:31adc39e6cbd1de2cc30989d2864be3ab48d60f0d73db2c486d78645e587155f -size 637902 +oid sha256:05cfc819b644d616fafc939da11d7e098aebde796c1478ae41d422095361b2aa +size 638000 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 9150c33b2e..7c2616b81f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f470ad60d159d6d5a1f7854aa9497495a688f6455da2ba316e955e78e8829e99 -size 599961 +oid sha256:694f0d93248813f9754b0a7b6b0c0b5448fb6fc6669986621a37d7b6d7de7cdb +size 600849 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3511ff88c4..feb9ad1c5d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:06e195ec865d704a8ce8dc1f84cc2c84de03fcab68175bb5b9205cd06fb66045 -size 544289 +oid sha256:265003b367359df5b9886a4f45d09829f8f0326023ef4487b9131f15cab1f927 +size 544437 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index ff8f692cf6..7c49f591ed 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b28041ad538cc4c8f5b2e4c5f8a99d2558d2e89136faec75d35197e935ca7162 -size 509853 +oid sha256:679f2b0010c411add0fe65afeee351837f8ea377a2730b701d83ad824a1b1798 +size 509951 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..c27ed18150 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eefa439538c49f426a4ea42fc5f26c05ccbb1ca4bcf786b6192bdca22aecc47 +size 785138 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..d06362cdee --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0be69794e5c793f884687d4c2a54c915c3bb147b379fbfe943630210c04635e9 +size 697218 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 050aa31c37..c5c6f6ce35 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:420c52ec132162f990a376a9af442e1b5e5d85c718c2b9b00c2fc5854e1a40b2 -size 643610 +oid sha256:bb59b0b54cb541c1e995e6cbd37f0c500391a8043afc0091fed329787093e788 +size 643708 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 56e6e37749..ea138dc6a5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:044032625ffbbad5dfe1aaea395bfe49c965e1f957730b3cc97fa00390ead518 -size 551151 +oid sha256:e31169582b9aa1ef51699b9ebbcb2a751d9032278dbdb479ecb393ffc8690820 +size 551249 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index dc8a11f18c..c97b6579e4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:956f1d04f2ead9bdba3bb89e3213580af1340ac0026fb9bbce52f872772f6edb -size 597431 +oid sha256:945cf225a426afc1831fe7dca4045351b137b2c69860c29baf6992b957d5a2d8 +size 597529 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f6d42a332a..13bffce68d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e736c73a3ec49a61225418e6cf3dfa56b66e66cee0cf6707e3a46e6e144a3765 -size 516961 +oid sha256:3ad5a910738365f9ea09a24bdefe3ad2fab0e8fa54e63fd6cbea9ce21850aa9e +size 517059 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..2f278bb232 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f97bad51386eaef49c95e9dc16ea4ed6846dfc476d8f7012b9c887047ccbdf9b +size 809730 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..4a522106f3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88fb465434b5c28cf602c7af6ec42c6f759766991f56efdf8a655c146ec2db9c +size 721464 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index e14d27ba67..bc72bff18e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:87b6d1add7dfad632942a6dd96a80d2c44af4448abfccd0e57c2cdd617f7223c -size 562677 +oid sha256:6bf3bebdca8547e72ee575c8c8f2d7eea691d58fbf99d1994f2909ae87a75755 +size 563565 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3a212288d4..4527fd123b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3c1a47a43543e519c1190bd9053f48a98f6159b326f1bd1b79c9cfb457880489 -size 462227 +oid sha256:ff0d7216c3a0b2437f64fcbcc77d03e989b81144a05b0bc41cae519c9cdbb82b +size 462325 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 493efa6e00..841145444e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:86df0cd8b2790aeb2d3eb168c905391e24421cca134d927c54fc2be85b0ebfb5 -size 532089 +oid sha256:db3b061c1bd6c1f115cb335811a07fc003a392a2ef1f1825866a864353edfd13 +size 532187 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 92c4d1a71a..ad8d21cc48 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:feeff0a432ac8eb939c23f8a1e2296340ac8f3fdee8a2f78f7d55a2df6b06a55 -size 433611 +oid sha256:626a3121522f6ca1169c9044746bce40a92dd625d136ca5f28131238e16a5fb6 +size 433709 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d9b49d6b41..b036822a0d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bbe4926255c76ba26b926ac5a36ea7fdf8f7e8a1b1b93013f710f4e8543ad877 -size 683970 +oid sha256:838cef6aef26a39f346d24de668908cc4142913e5c1148c82e012a6f394b3dd3 +size 684068 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index da89f28f1e..3e10ff3ce1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c48d37332a2e7c3aaad28ae9e258b24363f476d83ca1a1327bc7ad53bdbd7c1a -size 626444 +oid sha256:e20e80d2adae109983d4ebfda6aa28c6ce2c033585e9e1b222103bbefa39130b +size 626544 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index e5594d10fb..9f2de0582b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3bd393d7eab6991d038c0bf3008d277b387c10186e95037954f40dc38e35b769 -size 585425 +oid sha256:ee647739192eaba6cbb9f484f7a1613c3efd3f5b9e068e5050531846fe9fd36a +size 585523 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index c5d543973a..83094a5fbd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fb54ae77447daa7c35e0fb02715c886c7d0ee6d5dfb5310f8f6b6d3519e999f9 -size 545907 +oid sha256:4a642b7056334b9049427fac7b608fe32e3a37a43840c16f9d6cb421aa18df33 +size 546005 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d39e386411..c86d2650f5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3815ee18792ee9904c7f503d2a9f58be1bd9489c21df008f9e1028270df3d0d5 -size 672074 +oid sha256:f996afffc0de1bc0c5487075ed63e98d5e2cfd33d19cc17b889f7bf2443a8f0c +size 672172 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 1a5d3fb434..63989faf31 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f5a09961d0a1fb4b7ac10ca0d9c7e09018f50f1b69166f3dd574a2beea76c5bc -size 632358 +oid sha256:0cb855b579394a2625704a5eb332e7d5ec70f7538e3aaddc118bfa202fb30a65 +size 632458 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index f19e11982f..e933d3cfc8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:43fe622dcef8ca469ce503acb4d342c1c5f282262fe8d2e2fafb82d613b4056e -size 572147 +oid sha256:f5d274d60b69522874663bcbfd21cc96ebe79ebdc9a06be0e206923b8f248fda +size 572247 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index be2288dc6c..351184223c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ebb6adc2af8ae8ab91611c4d1ce7a8fef8b08eafabfb240062df52305d7596ba -size 537711 +oid sha256:1a9862daca3af141cd0491958d2ac013211f6e9547bbd53ba5848eedddd4c38b +size 537809 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..8dde95745d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd699c536752cc1ce5c10913ddbc09413cfda61de9cb4d175ca9bbb6a27a377f +size 876290 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..73fe6ab487 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fc6c227c8edcc27cc2f9af24178cefa3d3f855ad281bc93e18bd1b656c0fdc1 +size 780032 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 774078cd58..a5656b188c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cd9ff65621fc2cec7aef240a001c9791b034331233f4be3df4641f55aead35ad -size 676500 +oid sha256:5689f65034c4176ffdd856ca6dc50f2a79ef497628e2475bd32c4d22ba099e49 +size 676598 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7e24a439de..779339284b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:752172cbb4c88d91ed609d9f26fc8580f49de97eef0bccf19f64e6bd15e7ace6 -size 570671 +oid sha256:e08783fea94796c8b7d098ef1abff97e683cc176c1d8c5ce6cdbeb1bace6cdf6 +size 570769 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 3f5e90a3c0..a8cd2b84ce 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4387de627cefaaefe8a1d715027eb2f02b203668a9fb2f9b328195c395be0792 -size 638166 +oid sha256:944220a8eb0fd4d879137818ed8ff3775cedf6b48664edaa3df78b85512d655c +size 638264 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 201c02ba2a..67b81ef9e5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f5bf9684142ef37c75e2336f6864e64766f2b50bee4dcf757b40b4d5c79e6ca7 -size 536579 +oid sha256:dbd2839f649d6de77439391e552542640bbfde91551efb0fd367c41f12598b56 +size 536679 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..5c99ecc454 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d40367b1e898b5096e5c9ea21fe8f2294d65f3009188f18dfad2efc8025582e +size 900044 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..28aaef6053 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83de111bc7bc4ba2d7c796c14a107be099122c829d40bc4016321ed098f3214d +size 802948 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 959347676b..0ba93774c6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:06854c21a2dc8363cfeb1d8a617edbe21027ca3578041b1494a3aded67dba096 -size 591029 +oid sha256:180aacef509d87ff098962efefc7becf36e786201f23faca40b265a2c3c00f00 +size 591127 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index f03820a89e..780854c944 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5f06d7393ffcac3c44f0409b58e799328816379c21e33d98250420f387d71340 -size 488309 +oid sha256:5e716c2c7fe3ee8159b14fd28231ff1332e490cd26333c3c33c5c16e870d5752 +size 488407 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 84993d69b9..96135d4d7a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:de343a031400047395d4c7ec297b3165b38721f92200467d9ed83df8f437dd48 -size 558269 +oid sha256:6fdfa4f5f3972ec21ee9689aa402d2dad367aa3bc424eb43eec20c851d47af70 +size 558367 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 835b4324ec..b33fdc04c6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c47932af0f55cdad93d0eac299f4ac1bdba20380b67466b1ff808e35eca8f7a0 -size 458163 +oid sha256:2f6ed80e77438e82c3153ed6db721cd015f59c0b212d82b0c6de6d08bd6965bf +size 459051 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index e9e360ee63..01b2a123db 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:35ada42694db23eceaf9eff21e402d5623d749cac69dfcd94da1a654d4543baf -size 669662 +oid sha256:eea75f9c707c21c682a0ed1801a61d3bec5533e1d769af93d1a3aa842a61d0da +size 670550 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 51c8e21418..21eebe97a3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0154f586e5e54e0284f0be27fb06d0ce01722e2bf57c3d1df3f01d68933f4002 -size 612137 +oid sha256:2f7747ba47cfb13b143fbd43f50ebf0a7e04e1560d5d072ab4d4c68ed9cc8216 +size 613025 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 68142f07b2..2d07ba2f21 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:94ae612b68e7e3194933b23ab327fdc7c53a66d184e920e23a2262c9936469d5 -size 578321 +oid sha256:cffda317060b02692f4812390f8d3acb93a0d47c6a7c9cdfe5960866a98f605f +size 578419 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index a6354a330d..0f832991e4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2413deb1faec044f287115316bfeb14ac8ca2e5ff2439221794f69e6ecfcd1a8 -size 538753 +oid sha256:6a7d9c28ba4a3ac79277f98b7ae10fb26de1b006ee00774f9f1e2b18d54a01eb +size 538851 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 1bd3eac23c..9e27ad2c47 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d2c1081f1ecd210f9234e69a99d9676ae8b7ca69d386b526de76e272150e28a5 -size 657816 +oid sha256:5b05cabd9fa1b4af6f13f9c6c9a106377c507985e0e36eeeeeb0c8d495770d64 +size 657916 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8f1f861e5b..1eb7053076 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8d52f4b5a97ef4d8ca2b7580cb8296fb4ba138c85e86b90b66999e329cab6ba8 -size 618842 +oid sha256:168a7804932ec2b2285cfd283b7b3c48c35747d26d97000ed5fbeda9b5fd76c9 +size 618940 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 72341614bd..8bd63436e0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:81437e8c6e7338ba736f3b567d393493aaa7f0ed40ee4e73b502dde948f3c17b -size 565043 +oid sha256:bb1061c1f78eb97cbfd6db485a699680b797e65a9712072c6729e114ea85ea0d +size 565143 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 0f54c03723..113f3e589c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:85c6633b8da874feb3b790f0ad5d1e3119080b8f649951c3f06a0e3df72a54f0 -size 530557 +oid sha256:7459e344d608f09bdc26e4c62d793dd67006a3d5330dcb609429bdb57791165d +size 530657 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..4575198073 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28d48d233f7f4938efbe8439c8e6a444d9878c22e8cf866fd6b603a1793ff290 +size 860010 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..7629d0f8cf --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c61fc2d28242ea73045ca50f3b670f7c8101ff926028309bbef6b2c8a8de038 +size 763260 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 0100e85ef5..8cbbde13a1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e84d8c3942cd05236057df106317e34c1767c78b8bff53875a70f87e717dc88f -size 661404 +oid sha256:b95905402906a623c4b5b5d4b09ad79c0f44a68c6a85911e81227f924777a14f +size 661502 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 882e04a164..a746d76474 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:feaa2f59d5abcf496ac30f16ca99c3bf4317bdb52cab4294aecde932a0af45c7 -size 556365 +oid sha256:a654ce4da9714f98039085904f4b0fda0a63f8f4971a8faf720b7f7123a3d503 +size 557253 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index cfbfb96b9c..9014cee4a9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:25e34e513ca4039335abb0068d879bae716f7eca965c00d09d20b0059414d52a -size 622972 +oid sha256:7a0fbdd4025b1aaced313382f76ef9c9786384eb0474ed908fe65b86d4657149 +size 623070 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5d220f0a5f..7af440489a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:babd3350b57bec1e3918a5acd00568849fd31269e24fb8c0ff8ddbf568177de1 -size 522323 +oid sha256:94dae12190c9185256de6b72790a1743585671d36b68d23027689a552c196b38 +size 522421 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..231cd95125 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96b54a0df4911b5088f21137609481d8796f253879df446ea436b0ac36965dc2 +size 886772 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..1fc13ff3da --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f313ed1bd7fe826a63cf5f9ccd9ee453ce4e9d22798c0f63ecde046185ada93c +size 788690 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 411673a3f1..194f0c4531 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f2309ef5e255958cb08c569f3a0907a1d48f8334a0ec17e3bdda424e2975d067 -size 583875 +oid sha256:a482e39047c4508c45fa98cdd3c986b35e4643bae4f660b16fdfb278f0881a3f +size 583973 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 2bd807235e..35c91afdbc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fd85056e77194628af12ad0282cbfdf74abb99071e0dc8e15ba1b057f73b0826 -size 481205 +oid sha256:8101f17aab00e6a8a197f167f7af518fdf615d27f10016fbbf00f664aeffc45a +size 481303 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 76e708fd36..ff05197e51 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fe8916da0221b0711c114f89419489dfb8d731267cd5e5799f3878effc0aa3a6 -size 551115 +oid sha256:9d9a2d4d5ff60afbd9b3cd1b91219096e8aa2f4e5b0acb2bac7221af2aa6a798 +size 551215 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 7ccb665c41..7007deb539 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ed59f23f409b27ad92e91b18ab70e1beaba455379149df4264d8be75595d1547 -size 451799 +oid sha256:7567ecd14d84a272af9879bc1f347ca1bbf46501cf7a46b90b8286c89c26e217 +size 451899 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..25e433d30e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc45ec5c53716157a0b8af7ca839d935b1f2f3fe4bab8d6201378f4bf958b125 +size 643106 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..a4f845c660 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba2048d6ef41f6122b1b7706503d03d9460b11d6c9033e58de293e7ef9d5b64f +size 545121 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..956449b721 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43dcff538396acdf65a8443575ae5adc2dbc569016254e965d134931fde93242 +size 641526 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..5c662b5c8a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde5f56d53f2c3536e2f9ecd2fd20f84241bf980eb9bcbf0ccd2ba6b0cdbce51 +size 560759 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..786e676d47 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e67a7e88bcfcce2a9bfd46b58573a6259b8c3ce8264e4375eb9d845cd8bc48 +size 709492 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..a5a0acc39b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b9abb110089f62ff5d920e9e3fd425c605937ddf13ecbb5e8ebeeed8ba795fe +size 613431 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..593c542451 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cd4c55d19f64c4616adbb157893af71f1236e9e6679d132f9aaf21ea49fe078 +size 759436 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..3b683c8c3d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3146629f346b1ddea5ef6c15287b0dbc26a21a07a5ff51e1f148c7a0236c4e80 +size 653708 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..b2da4cecb5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b41dc9e092b636f8a01a0110ab0c2666384eb71d93d2e308d32699467a80308a +size 731662 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..460b7bdb95 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5365e37336458d46e21cabf2e095c0a31ea26973f8fdd736ba316dbec9019ef6 +size 627512 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 081940cdd5..12596b4edf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:007eb0fa09df6d9540d377eeee25ae0a16505515a8edd90603b253d61ae05d9b -size 803026 +oid sha256:ee751edf6b791560913e16f2739377324f24b50c32f42ae61bf1741bc0bb7585 +size 803124 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b631f8f6f1..6ede5f32d9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:55b7a2e1641ba2ee3f1e1f750a731b93a52324408e2e90865aafe72daa0ae181 -size 762718 +oid sha256:fbf4c1ab92f5a4538cf0c4580e1931cf246950324499a07705896d451247a35b +size 762818 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 563b4f4457..e6a9752a01 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c4bdd08f2be60ea2156c6b6c617f1f8ee77d2ae515602f83f874b28a0fd82069 -size 786246 +oid sha256:f8a09e2481e497be3bdb7bc9dd3ea064eac6baeaf5d1744779e6a74c4be480df +size 786346 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 3294310682..63f13d874b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:96da79d960271aa272a3fd07cf208130c27f998cff7e02230434b7c070450ece -size 752402 +oid sha256:53878520257f7e8f2744614972a5107167c1d1cc01c8aee43908bf3f9cbf402b +size 752500 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..5655fb2e02 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbe03b75d4ad668097d3653e504c98ab9596c7ba0a5b40f20f5f6abd7778082e +size 753564 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..363d182ba2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5dc8ff10d800bb9de39fcbd907cc7bd2bf5333a9a821cc4fa99d243d5f34bb +size 671564 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 679abce9c4..ac8dac65a0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:44cf2b882a2f4ea479fd365da05c11a73a51569d8c6ca2938b9bf2e77318309f -size 775922 +oid sha256:9bf4611b13355467a3fa8c104083cac0be5e139a1196fd944ce2e01203d1dfc0 +size 776020 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c37a53892d..112f5aaf35 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:28397ab6f02a0e2b2180f2b8948ae0dccf7317e2fb9fbcf4d767b6f617a5a7ea -size 700878 +oid sha256:aa7431a214073a36b3bcfdcf306821c9844651a3b046af4fb163525178fa560e +size 701766 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 138b0cc3d8..74df4e31fd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:27daf8e8ccd57a2565d50b20ab717e9e374b411a71403ecb30a32a6379de9330 -size 724070 +oid sha256:a61feaee55789720bf561d4cea340b0f98ee38279ee11a60aba76d309e655fc2 +size 724170 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8a0af9b42b..fae47008ee 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0a133f33bae65a71e223a22b3f9f6e6b9e704a9917cd9826d8cff27ab1a1af70 -size 663826 +oid sha256:022eb7fd780052a92cc0c951ed6e8caaf4406e00fbb6b5409db7c7ddd762413f +size 663926 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index eae728bb3e..1bbd3ff991 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:224ec1e7bb154e7c46c89c1c78872ce4c8e6021ed1198db2b3c1c3777dcd949d -size 776040 +oid sha256:e05c861c94e7e60ca451e6fd9dec902783f4f9ebf15af4a310141773dd574fbb +size 776140 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index d91c1b4a29..9c4a434273 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0155bb18be63c95969449e9213f3cf66dc1b3f64ad67cecfe451db658d2c5542 -size 735684 +oid sha256:ffba83633f0536b18669e1d72970696157b2a200d376da666d95bcc45a774a20 +size 735782 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index bdae173db4..bb74875664 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c07cc88475e95ee2e0c927970dec12c7f8170bda2ed3fd78a60d62509a76f3b1 -size 759262 +oid sha256:387c6400df7586b495ac5ca087df7e8d112b3919a7898fa36e88732eb8727f3d +size 759360 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 037051cacb..4f4ff9ccba 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:32ab366e849fecff876acc4c5bf907c3d84fec420f75f65790c51babb39c31c9 -size 725416 +oid sha256:8faafa273a28b494a36c1952581cc200e0f56f7306fecbb7ccf123600cca661f +size 725516 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..43c05a1838 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:463d519ce79e5044cbaf402b365e88455e4afea1119a3f26aee9e0de39821470 +size 725790 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..4200b1b4eb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a00ea706c42db149d1e84113d883e5e8332a96310c58fe7a0f199a228efac3ef +size 643790 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 9e111da555..cc950e5782 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bc71035d3173a5bcde96e1057e368cb2b0fbf15fad77ade7d6ea51b504fca1c2 -size 748148 +oid sha256:7cb6f05422cb87ed51e87e6907043e3a183f9325ad1191c6e8794372f4223d89 +size 748246 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b6abce9bb2..5fed599bf4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7ed99ad663ab69f383b2c091b44b1ab5684bd656d017dbd9da2ddc51cfc87032 -size 673844 +oid sha256:e644392e175394dafbdd447e737260740fd32fc3e5ce75042145875d4c039d43 +size 673942 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 766117bf53..aced781e5f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dfd4ffcb430990146100b58e681609a666bf06dcf1351fb78112d5d5ead06583 -size 696346 +oid sha256:427ed016ad01763dd7529d365c9e9d651b1df7111f48950c04d6193a2b42cf34 +size 696444 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 15f67906d1..79163138b1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:725961291156fbe6642e2b3977657149abb043200e981441b31e88499397bb67 -size 636842 +oid sha256:6191d132dc76380288be6eb4cfb815651449b488eaea49c7f81abb89ff350ee1 +size 636940 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 39f5bf2bb9..b924f6fe53 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f88e8a4f032e028fe0fd4b3cdd975af56236dc0d239dfe21dc0755c4db2a609c -size 823582 +oid sha256:8304693649f034c63132673a0fa2034414e265b1ad2b6fe1063de11f5cdab545 +size 823682 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 802f22ad97..133d3a059f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:86ac8f99ab814157fdada6ba5900bd03e5c275438edbaacfbce5c09560e785fd -size 782240 +oid sha256:83feff23808c7433717c9340f6d5d69b4f984d0039ba57823ba794a5c82a6373 +size 782338 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8653ccd6df..cb2b8997f7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9bf3e12769f68ecd2d38e041c491095f36691215cff9654f96c9d40bd826f050 -size 806014 +oid sha256:db768b203f95bd425a2c8ceb43467ab86ce185d13ea15f6d0590095c1bf46949 +size 806112 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 460b6194f6..ce8b80b14d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:54ecd1c6a81dff1e1424fc225dd13a10e4f4a6ab092ce26c481562a9175636c4 -size 771182 +oid sha256:7b62b875d9752adabae7513fc4842a7193432e5820420afa8452d03b75588930 +size 771282 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..6ea05d0bd9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f6cb628e4526704979dd6ab764c3f59813db705724b6a3dff275b5e0eba7ec8 +size 824440 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..b11bb2ae7f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58062552fb3f183ea011e6ee5786761c6244a6a0dacb91102ef5a7a4476f2598 +size 721080 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 3ccea06294..661c32b59d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:95f4a540025eb2782f11360782a8539dfa1c1c104d8bb662ef8ac368c04b1662 -size 793518 +oid sha256:7eed1900cbb7c57af9a3c8abb003e70fe193d242d583b484f37e3523c74daca2 +size 794406 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 1678b0385d..314f0c75dc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e8df3dce08ea3bff45c8df6f598918bed7cb01dcf42d36d0a4dbba30022e30fa -size 685520 +oid sha256:f63fc9b4094207cb025ce98bde999774ba9888d659f40e7ba28b7a4cf33c3618 +size 685618 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index e7f024b84a..57510a73da 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e1e6e77831658ef7eb24fc045be4f970af037caafe14542f2896fea964c85c00 -size 751978 +oid sha256:c49336b9c2353fc5e7312a28d64a5800d22ffbaf8783628200ebb2dbd9431b36 +size 752076 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ce847952ff..86e3e7a1b2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9af98212d249b837a6622c17ada169e22e2586ca122cdebd2ce2e6f3ac09e572 -size 647038 +oid sha256:b9c0e17f5d2c2178e2cf24ae27213bde600b9e9c04bb425a0ddbd221e656c37c +size 647926 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ccdeba9a5b..5cd557dc8c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e6b59fb3c86a4b02324f40a0da2b501fdeacf71d47d4e352deff0cfe51b950f4 -size 795808 +oid sha256:c4da9f1225ea58de94edd83d85279f6d73f78354c7c562a48d507253259c39f0 +size 796696 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f7786fd871..6a9ba74694 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:77bf68f2733fd4ea4dfbdc990f97f65b21aadc54b6303284c531800b5d62b7e3 -size 754464 +oid sha256:b7ce4ee151c0fa2898f741e9f8144724e1e27c40ea90c450a0c8cf5ed708a21d +size 754564 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d92b876d88..0dcec119dc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cb9b300723d1275172324b7aebc79951951284c143e4200a039db64200b005a3 -size 779028 +oid sha256:2ad13c3f8efa5e3d9cd286d8318253fed025c2319a334a7739fab95bc9806b2b +size 779128 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4735faa0e6..bb55b98180 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:059b1767ab70adbbc9c20e8e32ceb539696479fa01e9fe582aa627b2b4186cf9 -size 743358 +oid sha256:6d375b372bce510b2443392eba69e2c7c41e2b97ca43c72ae5928b85044fce44 +size 744246 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..573a772a9c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9df144d5ec1625514676eaf30131a446b9d15b3f03b899eeed1d0716116060c +size 796666 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..deda3d4401 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c747f7b421518ee8182299c4b40d8848c94826ee45aa9a744969532d877823d +size 694094 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index df82498d5f..291c7fc995 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:54720c25fc766ad0be7748537c527998f4b483e051b7b252ef99970732106b11 -size 765546 +oid sha256:f4961066da60415686e116615e3b80b309971f3bc09fbe30832baf913e47e3ee +size 765646 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 1401380118..7dc8f5e772 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b08f4a9012a08fc27e65c07f4ae2326baf61f3e005b429889ab2c5d7476746b -size 657696 +oid sha256:75f1ff5c7d725ebb6f77b3ddbb9a84d28531b5f7b4f06e4a8a8cfa67539beac3 +size 658584 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 35224d8787..0395c56fb2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:463b30f9cb621a7b7761756e9e377525eae3495860b271fd01356ca432bd4d76 -size 724204 +oid sha256:f9b46b8cca15b2395920fae692ba7d2c4e1b07f4d01d65dd39a48c38431f242c +size 724302 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 79d2f84f32..ee475c97e1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9e54f569d5661e93ecb53144fd20f9ed08ad9ba5ef9c9a9ea86ebd16e6bde7c2 -size 620052 +oid sha256:4fef1178c5cef36d7449e9586be3ad6af87b6e68ecb915cdc4d222ce8944ec01 +size 620152 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..1f0caae60c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58a61ecbab9cc6545e6f9d1c2c6d728e4961ddaf4f5f73e20d1e49d932ffb553 +size 643696 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..1c64cd83a6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ab170f0d3f13918c8365c107c0c348f2ed62f63a29c655b3dde542dd7a9b838 +size 559969 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..6e184669ad --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd66367296b04789cccd7622439b59e6185d751e33506b5cd868f0c0de54d500 +size 688022 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..c2b132dac7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea986a17cb1933f5b07bf9f7cbc3205d37e10d58359df5fac679e4ea71d62c9f +size 603653 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..edfd6aa754 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1b005879b026651e07e9e732e9db24bc997d1b1c63748637faedc7e9b01bd1c +size 640340 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..4b3d575841 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ab4e142fc68da6726f542f641702b0489aff8e4927112f9a3d7b134888c18f9 +size 559671 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..4af4f1b813 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c7d204e5e3fe2224794d3e0d9937214cd4a7ddb3607818c4030bc9664e8cec3 +size 686146 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..b95e5e84c7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b78423a3fd66fc445b57bcce55f38283184b2f29b07038caee19984dc3fa9b60 +size 602269 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..81247095ef --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31c302cc7efe9b0ecc8264cc1eb1a9d2d503f642e754857dee21617c00c4c2b6 +size 709984 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..67a29ee2f8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4ff6f4ba7370a882e7d5a40575cba5c032f23f9f237bc2971e821802894c11f +size 628082 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..6460b61ad1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3b73c38f2e7f517a1922fae0c303849a57d78af5119395fa2e292753d6a0303 +size 754506 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..e8dbf32528 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b61d0e56cf14320f6adaf124ee037ec1e9d6d7800bcee920b5e45c9df09a9b06 +size 672604 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..91ea05c53c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39ca54c47833884a1227a7114717c2861c8e3d588f76da8824449aee5e53c1a0 +size 733584 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..5690c00259 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:229e7770691a266dac270ac8d6e0d9471789d5666b2244b53097476d96cb9a46 +size 639990 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..10f641cbda --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f05ad7eeaadc5ffe2a02f6e1c9afe84b0c5b7929da15e3e25a940cc5461dd28 +size 773518 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..3f48b9db6d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a10e4cd5330cf342b0de0712a519dba2314ff71a8aaba72393d601910d606c44 +size 684020 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..f1bec5c4d8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cee48d08d8c60309567f48429c62968a18a2a179d012cfdc74a3baf6cb8df1b4 +size 726430 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..fc0fb4a745 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b18c7a87a42455137f3bdc0d9069ec220ab33f985fde77e7bc616ba7e2e3756 +size 632838 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a8258248fc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef2711c8710b7ce0eb39595b012bb161ef1c20c638c67fb8f0eed11a4486d3f1 +size 766366 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..0dea433faf --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:479efa1256016df22d2a44a1941e8aabcb4d02b14995617e28eb08d0c5d58595 +size 676866 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 4da6134e43..37ad3febad 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eb8f01183f1348371e2aa1e8364fd0139d2e878a2a93cdfe1460d40d47e15ee4 -size 710824 +oid sha256:8e781c88ff02b1c7a78ed4103bb948c5cb1832493f84738912a1af39d2a5e745 +size 710922 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index baee75afd3..38981d6542 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4b68020c09dc62dac10c1aab74913df21862a453a813ed712bb92c3b40cfbcce -size 753818 +oid sha256:5be11668844a8a75f17669695c4e72b52f0480325781415455301348348aa254 +size 753916 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 3a3718c905..62b0d9ad80 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f99395f5cb7a65b41038139b77aca4d4d84a206c11a961f3e8cc8840a6f7b5c6 -size 695130 +oid sha256:59df2b667534ab95d65955fbbe98480bece062ae43e745611997ff45e31a3dbd +size 695228 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 94e3be9292..0cdced516c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9c2539a70b242e2bd21a11c87e2738be3767c919e12570342e11bda386fe6acf -size 737384 +oid sha256:9fe8ac44ec7e42be4001409a833f6061bf57fa46e1d79fbf09b36f34540e76e8 +size 737482 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index b0e55170ec..556ae0ac61 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b1546cd2f39ac1b484974c715d9c5e285c1e49c650aa96b241792b33f118d8d0 -size 684066 +oid sha256:53aaaa255604b60e23319001f43ed2fe60a55a3fe97374016cfa5a1990e868a8 +size 684954 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index aa5e1c9423..f02f81f5b9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a28d62736fe64b494ca58af890f579441b94de64e00385c5d41816b00c8a0fdc -size 600831 +oid sha256:6c542bba6ec8d490a50b9c104168daefab9172017e32b2fc10adb1d03490206f +size 601719 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 5667967283..f09f6bc4f6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:537cf6bac27d828e7f205d4629208f02baba653f67f3eefd0916400530e36f46 -size 729674 +oid sha256:b3e5bf41dc01b30046b5ca53455ff9f988a7fab66ec8a5b0f4e676dc18c13fa8 +size 729772 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 17a431dfdf..781f36c8a0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:527ca937377906c751ee1571192ed7df6f8ff8478202683c1fcc71cb23f44fd2 -size 643874 +oid sha256:e9152e1edd5ae125aa3f653ded142e798594066f8b273e3f9f2893745f2663ed +size 643974 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 46fb588a93..240e858cf7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:680b7ec730126490f8ec35727c066b52e82d5bae1df4ce474bd7a34260e710b5 -size 703670 +oid sha256:91f27e09c5016000697656bbc70d2634c1eda2bb6060d00ac86408124c582354 +size 703770 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index ad84dcfcc7..2ebd1d1b76 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e3b47637025c8abcd38944647ec82f9c2e1f45168ac4717142a17e158e321399 -size 746714 +oid sha256:8e72eeabe5e28cd6c8614ac9dcebc6f79811a18f0890e8410d828ae922409570 +size 747602 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 4d1076e54e..7df9648d07 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3cf01c1991a22c116986e8bb9e7f133c3b7969556fe3e0b26ccc321e887b5ac1 -size 687976 +oid sha256:cbecb87c1d2d60200da7c254982b1beb0076f2bf0629e6f15400b5e31f924971 +size 688076 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 44946b1f1e..3996ffac98 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:28076f720b90d6c5c5b16acb1574ff610f5c390d0b1eb539c1b5acda7f19f8bd -size 730230 +oid sha256:82171bd838cfa1dbb574296ed7f9ad94081ffe50df7835b42b84467889a3a5bd +size 730328 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 54ec57e326..26da535d47 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1cd710e7623eab345e8a4a0977260ea430025a8e1a1aa75603528ef12f8a2853 -size 677702 +oid sha256:71b0bb53ad7684f3eae63392b9a4b609c720c5d7a5bb036640d1d30eb398e5bd +size 677800 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index c57773b62a..ff58f20a07 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:de7504f406a80cd0211fcff6456e336ac4bac7f58de8429374208f0581a3ba96 -size 593677 +oid sha256:e82e78a049d9e89dea75f6328ce071b41673580406b582f2d699086c84bb5a34 +size 593777 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 56b65a6356..4f83fef76f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:328d3b147d4c52ba381a0240c11485aafe00ad7f290981d278d1d4cf18846ac6 -size 722520 +oid sha256:8f1b583b4487d1bc393a205a4e9c05dc885aa1015bba9e15188fb13e2cbda877 +size 722618 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp index 46ef015528..a60d9855fe 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:07f440a5c6b7aa4a13028e95a0af342c14ca7e356fd7be95ef8beaf6bf762cda -size 635932 +oid sha256:1b1e51e919b05359a6abe598054e0a4f85b5e180b1974e990d05a7d81f4940fc +size 636820 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 38b8caf746..80b2b3e205 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:37160026370172503c50d0da2a9a03a4ea9f971cfc33bc6535ef251f4a704d39 -size 618862 +oid sha256:bb9b26a1ea8a435f04178764de2787811c7c3e8bd849b1f98cc5f1826d14407f +size 618962 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4a19390efc..7c7e964026 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7e5f2cfd57be7f1d87861e26158839b8c81ab38e2c185f207909e29cffd1ad83 -size 593947 +oid sha256:13387cb2805829fb7b78bbf5187de1e2992cf3b5d1ca29344509332110cca681 +size 594045 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6b895b8ffc..a8288c7015 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6cc0700086cccb47063a5638f5b27b03f00484115e08f6c221e49225e278d8e9 -size 537041 +oid sha256:939d486e7ad475de63479feff32d0d8c4ec7339515ea199c34cef4b311d8b02c +size 537141 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index a3889f3756..11ee705571 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2c8d39568a095c8c7b789d6b1e531f238498e70834981115b4ffffa2795f7f17 -size 514939 +oid sha256:bd5c4684fa8dc8bf7cad8d9e1b1b302cb1679548281943fd1a3ccdf86289a9cf +size 515037 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f16f0bb3be..7d6dfbc028 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:96c54b69ab79b14357a172d98c6db757dd8e1161a49f2981629a3db48f8bd7be -size 610519 +oid sha256:5762ee86002e0b7f42e9107f62bf70a7a70e3c3c355592f34292c9f44e47a179 +size 611407 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8ca6ca82f1..45dcd92f2d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c56132f0ae169a75606cc4a5b8d6014e2dbcb48656465e707cbe46acd78c0e14 -size 588415 +oid sha256:81d2741c6b8d7b543f39880b6735f5d672d6e503c48a9c3c658f928d565f58e8 +size 588513 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 812bd5a22e..2c588a39bf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:42c1875a2b4f18fa64740363c7b03c3e82f908b31817465f99c7585741e88d88 -size 527761 +oid sha256:a3e47069dca50a2233303f377c86be4231cba29e44153bb7a6e44f98a900ee90 +size 527859 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index a967366998..4b95d8b723 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:41ef3f0bdb324bfd7e7797baa79e153f09a9e28f7f834c00038766ba95e5b2ee -size 506299 +oid sha256:69883b5936e8dcd69baa5d311d1512c336db231a67c32c4b3b0b3304a794bf05 +size 507187 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..87abdc2a60 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b894c9fb2e02ac24c79943fbabdec0520a205c54a2a4edde8d5eb1815290312 +size 723814 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..ac3ee91743 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b481fbb3d121b8507485411b3f980c8b984f956499ad9ec2685007a2f1568503 +size 640778 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 6d87ccb561..cb250131de 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fc77b50991b4b6d0b8f597858b68442d9b2ed7ac07b9cf16d561ebf1ff77183e -size 614797 +oid sha256:5a2f71198c187f57723e7b074fa86a31b44dea365f268ea618cf20238b118be4 +size 614895 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 47ea4c48de..0f6f66378e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ca46146a2dcae2f5208ac3da7785f28e0006c12a1c773158ba24e9fbcc655bfc -size 532105 +oid sha256:bb85486739df9169f2fbec6882621a028e8beca2b632cf198a84bfc3bd3edcac +size 532205 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 93af593596..d6494cdea7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:48fd5b9174b5b87e8ed3f306b827c54ca75f44321421ceced463b6a6c9ac7be4 -size 588155 +oid sha256:c9fa7ab94a42f7b7ce6d8e1c66cba028decb15afad64e62575b33ecb7bb01758 +size 588253 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index bf18842f84..132de19909 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d7b0ee22996a8f0a4dafeab516ecd64b548da6128e16879aafa00b285b2f4638 -size 505563 +oid sha256:11b62a56d13d52b549bc0a87be49f1bef4f720dcc360a38704e624f53ad3cbf4 +size 505661 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..866539fd71 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e68644bc339c113a0ecf67e1b2ef36d6ddff6d668c4436a221c7dd06c2484136 +size 768534 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..f103ee700a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46b7a997c6c9b27cf89ab56da82f6b41c5805baa51584540de8f8f3dea41b058 +size 683574 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index f8cccba90c..879bfae140 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:134b248fcfb9d8d6e64fa9752766858de22410ec586a5e895480b845ac6dc79a -size 554633 +oid sha256:a09eaadf43378293bda79bbead09904ab8b2f9a9a478e119ee9fe5c2e3ec0821 +size 554733 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 325164ac3e..752d476932 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:94d9bf31e9cc46ce2633ed3551b74ce55a072bfaa315104fed7daee18c87fa89 -size 453197 +oid sha256:0dea45da2ec4002cdfa8cb70b339e8a7221a6dd2e418356146f08f63b76eabcc +size 453295 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index c0072bf44e..3220a24165 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8d1c81121bd65cd94a2328d847f4b163f291c97598982aba9a47e0cbafc8b56d -size 528287 +oid sha256:7a5ec2fb50f6c82e8c6ec7d9335b01dbe0f688015201100c3876344a079468cb +size 529175 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 3895702c40..084b159b8d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1d922152c5da5eec8a170b09b8e45179ade1c4e473912aba170e96d0379a9d22 -size 430699 +oid sha256:b19f6dfacda73359199dbe2fa05b99bbff1e3823f4557e02081e1fb8c6b2939d +size 430797 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 4672f220ea..320dd11b5e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e7a5ce159237a77ad2ea0c269e46c409e30b004d726677ea675a2f6305ed7f1f -size 611709 +oid sha256:145a476941723e8aea047f2493220468597cac0f50dfee24c310b1acabee214d +size 611807 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 1e48b2654f..b0580c0d69 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:39dd3500e52491cde809b125dc0d9163761546c47229ab3ad15ec80364d506c2 -size 586793 +oid sha256:f33345d5b851bed59e79f9434a9cbab2cfda7d926a83dec7e00a11f2d7c7341d +size 586891 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 17459ec0c5..008c353368 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:adae5c758ed4f1cacf75b259beb48577ddf57a6a163940140506a08906e8fe0b -size 533095 +oid sha256:be6a19b680f8b380eca6abcd4f1cf8b509f75d203cb6f122cea0bb614185aa90 +size 533983 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 321ecc9bb7..c8bb3b3cd6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:557ce31dfd0a0f15d7974d2485e86e3a6b6e03cb65746c5c8571c1b1b37a1e1e -size 511731 +oid sha256:9c9102909463463c13c572683233ccc3289d1dabada7cc23a68e821afea10bbd +size 511831 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7693a6bb13..b403fc5035 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:256e357d96f9f53f8e021df26e665e91b70bcb224ba1d006a2067fe926386cd9 -size 603365 +oid sha256:ae580d722f7929185fe55998506c46d513392ce88a0e5f86d9d4fffd84ec5634 +size 603463 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f9cbcfb13e..84da0dcb5d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1dd0b82ec2977b3de86fadf0b69eb314b5022a0528962022b32ca79eba1c2b19 -size 581261 +oid sha256:750ebd224b9e0fc8d228b5a4a80533ae912e22f97891315b07fd3567094b516b +size 581361 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3ae4e6e928..c13d267e39 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6c7995d6d2f19bbe623c12d74c6601e6989f78d712addfa82c1d5164d19fc538 -size 524555 +oid sha256:ac35237c9e252054320ce0eb477345f43258aa5183613593363a2232012c58f7 +size 524653 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index f6cc5c2e30..5f54e9d531 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f497ead1b186754ed91b1d3c65d74c755c23a17cfa220656865ad5a1cdfe5513 -size 503141 +oid sha256:134a0c2400104ecdb3f5f1b01398b5fb317b6a9e8044c2323dd91291ae5c15c0 +size 503241 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..0052942cfe --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4813d1c8e9849fb8a35569f24f0dff507749edce2acaab7f47e3274fa440a1d9 +size 716760 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..50d74af9ad --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:568ccd4d22eb3e7233cf12e52e8df332405c01a85ae7371c2928d0e2dcab1620 +size 633624 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index cfb5d2db3e..6b41716244 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c7d3c0f908548dca2cabc49643653debaae3af35f4d70af266b5adc144e1d115 -size 607643 +oid sha256:679ba0c3d313e1db78a988c9d6703b0a88a4a8bb23125a9f45227aa80393eda3 +size 607741 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 1bc751222c..2e80c6f094 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a754f58ef3be2694c40670be084567ad9b11b6d4a4147b395fc1a9b6a73c1fbf -size 524163 +oid sha256:1c4dd2f56c1d626e3adcbd2a330d7cd79bb7f5e8acc7aac112dc91979cda8fb6 +size 525051 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index fab760e29e..9fb0a2f173 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f83c051cbde778850853d03045c0f18f9deeee5a9bab8d1966fbf08e2c65d4b0 -size 581099 +oid sha256:920327d7da4a6b315ac9725b9969ffd2139f7b9f444bb3c64adec859d7fe5f61 +size 581199 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index dea2719a27..19c74af2f8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:999425aa93f82b44747e5478df752dcd8548a5e13bbe83cfb9acebfe5e01cf2b -size 498409 +oid sha256:f1a0710bf0d9f979cad0a62dbf61bba4dc6667961fe27b630285a0eaf41784b6 +size 498507 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..1ce251b0c2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21ae2eef992fefb42cc38987f180faab3838b677126e09532c776884ad93ed7d +size 761480 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..944d2dcd43 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bbc721cdc8722153dc89101ee19ad282cae4c12b81fda21974deda4bdf6b8cd +size 676420 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index a11683de40..37f7a22d0f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6f7c26af6c0bdc8f569f9d2a11a1ecc05739388907f4989aea6f57971dc3e419 -size 550687 +oid sha256:72f2a9702d64d1582d8af31d63a8c63c7045c203b94ab8e4bfd3877d7e73d5d4 +size 550785 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 32a556a175..214c0f147d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:abed6ff323ef927e9ef4329ade16b11348428f531f73b03266a7081944731f9d -size 449249 +oid sha256:42d2a997c5f1a21c8e4ada435e913fcd2c0632488eb78cb5f550c417eb5e21c3 +size 450137 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 791b63fd9c..7dc0bd250d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bef531b5101fb3b5f55bff7d326a92318278adeb9114eaf32f60af9d291c57f0 -size 525081 +oid sha256:6a6378dd68d3b5bcc49ef688dcc732222553e5c4288736f6c9f50f55459cf164 +size 525179 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8cb62130c5..965d704a2d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d32a394c55c75722e465264198c4ab6d904c50d94386d4376b43d0c1505820f8 -size 426703 +oid sha256:1069682e48ba73db787143d059307d1e655388bb9a8fc76f532a526f689d2982 +size 427591 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 36f3979666..f37a4092c0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:77646ee52320dd5e340efe21f821d9aa5f632f52b1c5468ea5416e4174731cd9 -size 640258 +oid sha256:31de159d2f54b82e643e0fce74f7a7f68e48ab5eae53fd70031c5b083ebbef91 +size 640356 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 492ed767ba..18e7b27dcd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8448b67a7a5ea91a47a65698d272af9962ad8c1f503235b116495371ed1a3c45 -size 601577 +oid sha256:5577b09956a62b0b7c93dc84688bce0bf8ce292c9a869a2a0bba24b98f1f323c +size 601677 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index b203fd1c08..5ff57ec756 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:63f39a6e66f79317d5008ef44218131d78cd270d9fc605c6d5d8ec90d1e1572c -size 559325 +oid sha256:f2e096be97236218ea167fd320ae246d4f51b9cf98449a91acc92009d463a5f7 +size 559423 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6c8d92ac70..59a9497944 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eeadba77f2d23b424ecb0445cbf84f2b80789daf6ba8c567887bf0386616533e -size 534953 +oid sha256:cf10f0f97a2ed56d7ff54d77f5f7b9ad212cbd428ee1b5e9981630bbbcd2e660 +size 535841 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 81050ecd4d..2611925fae 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1444e1441179cd25f0a7ddb276bdd8d93eaa5dbbcbbc721dc8af406ff8b643f9 -size 630386 +oid sha256:22f367d07d415b478eb24609f0e502d147427a44200654c87024b68d5706ed91 +size 630484 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 1e1769d11a..835a0af757 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:976275e90b0335fa4c8dec58b6b3a7c4f134882539281793c96992eca3e95029 -size 606603 +oid sha256:a278639371d9fd07d8ad1643a983d747744d803680e9af9ee3f05e0b23cc8b8a +size 606703 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index f151d4ce4f..bdb588f9a9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:15b344274a07668c06e2606e139f96305e8d17238b7edb439bde4e4c07f17f9c -size 549255 +oid sha256:e084df96e56d11e220512b9f6960820bcd517cec32b9e535c0f2323b222447ab +size 549353 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6633a6f494..31e2932e7f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:702f3f948a6345f5b67a88b66f566e6c27aa582501356deba8af6ad5bfe9e972 -size 527595 +oid sha256:0ed3d344794a725e4113fb16d8fa35f2b31d748bd4dcbc3270097dab0d35af46 +size 527695 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..234c842769 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3601cca5a820e31c2415222e4575dcbc5e2fe8073939a3e652f229ebaf0e434d +size 803324 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..33745fcd7a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd0f59dcf2483ee1e156dc411c34899664fdfd3ea726f716eb72cebf135d8ad9 +size 710470 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index e2d69eb8d8..2f45fdd818 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d52a6dc521f796ef2e575c9f17681a000e07003caf8081328331f5fb3b2a09e1 -size 640436 +oid sha256:43e79de3f51c9246c8618daf76a324738b09336b9c0ce602151a7c37d38ecccc +size 640534 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5ffbeb438b..cd3597321e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c475e8cc6c976bf5d4d3bd86302df832171fdad7055daa2b676ac2add8f6ea62 -size 544423 +oid sha256:25af58ea69121e0bca02d424cc8b767f7fb6e49e47fb9a9a0e769c225e5a2983 +size 544523 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 732564c273..0347e60565 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:49dd4162edf1ae14cc199809e1b1c53ab9231a703ad430b9987a128824be7459 -size 610833 +oid sha256:45a5e4434bb4555662f5b8e2a0b8eed8a67ffbb5cece6995b40628ea08a7e6dd +size 610931 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8f17ca9f53..55cb71131a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2efd9a3cd054637637bde802708ddf598181452b724da38e5dfc8ebdca159195 -size 517239 +oid sha256:4f1692caf0120ab7d9a053f1872a28360b97a985bbb3ffa08c4692e4f432a7a8 +size 517337 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..786c39d73c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbae49ebd86cb14fdd6f591496929d5aa10fa7afffc0a0f3f1cccc50787ef7d3 +size 841778 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..d30a1b0a58 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d3496e1f323293a98cd62cca7c12e4e308834639d1b22a26a59ea09fc2c861b +size 753366 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 8aaa26c53f..82354566f7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9dd3894a6e02ddd265416b54dfeac0f18c425423f421c9a30de9970db43380bb -size 575091 +oid sha256:9ea2172eb57eb4458233e8b75d459192dd9d9e5d9784a3cda92eeddb5c087fd8 +size 575979 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index b5be81d5f7..5f9ac52b11 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6dec77e484ebcee4423cf061ecd704f8d1123f9c57898c85ad047a4dd721bb04 -size 472963 +oid sha256:c78de2fc747f2239c618c44e3295d15eb6ba74881a8ef0deabbebf53a93952a1 +size 473063 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 3ebaac292e..9a4f25ecde 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4d94cf4e6b294b2a59c8609b96eabb4989a1b654fb0eaed4ea5fe761aede51c7 -size 548203 +oid sha256:e12d7c6ca313bfc8449f611dc7334b0fa28ac5f1ab928b6a875d846c963bb86b +size 548301 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index ab28795985..a00b89cf43 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:07bcb984cec88f92634d4c5b7b7b22de5e9ccf522606891bb14e17a2b3e3d1fe -size 448049 +oid sha256:df6bad756415b7c618d762c02f19ad27f6839b9ea3e9179feb40384820a4db1c +size 448147 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 58c2224db2..b8f2080067 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e5b2841c8909272d7e7045231ea9a2f6fb3e58b8ce3eb89aafa8eba7462d39a3 -size 633104 +oid sha256:8e19019eb52029b06b302ba22550f7e5623dbe20514932a08b14af3a8da076a6 +size 633204 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 3ee252e383..a96fa5aa92 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ddd9d2ea6e46ff5862727a4da4c9ac888c44d6e141a426d5697b1b7e7b61b583 -size 595213 +oid sha256:a61ed16803f0f2efcbdcad7eab9769bfd3d10c7fd8ae8b62438e1b5a4b2e56cc +size 595313 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 57c0a615a9..98a97d0277 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3150db000ba0058c8a844ca25e019a261a386a665a9b29d1c51bb5b499cf0500 -size 555379 +oid sha256:f13542f0845dde3252e93efd4c0e4fa84117de093d1a4e7fe0f79ac9bc8207c4 +size 555477 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 66a00f9677..f927110f7b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ee78ed74b66a6d59fe5bec3bc61b90a0958d6d62b49ca61b50c32d81b1712c8 -size 531745 +oid sha256:41c97d32f55f464666cca094b063a92b9641811404a7405dbaa0370a00489004 +size 531845 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d99674237d..cfc93660b2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:23f9c63b7c938423da8ba7bffbd59fe19d57ca69b50d415d12f066fb76d5b06c -size 623232 +oid sha256:6cc29b6ee9b380e2b1a6c07459a4c72ccd90710d725314d3581fe90f04aba939 +size 623330 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5cfad5b0a9..27729872a9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cda2934a292d63c99158f336e78705446752d708878544aefa674f702582e747 -size 599451 +oid sha256:a44b977feeafc3baea6cd7f5c5afe11b6ebc1dd2b7b011713bb70eef6241165c +size 599549 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 15f9eb0a49..36ca12315e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7c61f7803f517abb6eb4d5cd6fec93c922e9d11d12eb19b0f25358e17ab940f8 -size 545259 +oid sha256:1b377a940855396648a3c2c131204f66336fe8c214030c0920a1ed1bd7bcb711 +size 545357 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index 10cb6c4b09..7715ecaaa0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e814b8b591633302ade5eb64271cbf19f4e1d9fc5ea7ba644aa6f2ca65e3d801 -size 524439 +oid sha256:63b2cce33e1697c0cf1d4f63608e103cd772f071d472592febec0b30f9115c6f +size 524537 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..6ec5150490 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a30b11cdc6f38c534b4bc78b421a25eafbbf0e62f99202d72e369b6aa0946e8 +size 796170 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..2d45d09450 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:426be544c810d7ce45d0b85cad5fce631466811b90b3e2ed15b7db558477f7f3 +size 703366 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index f5c541f23e..2ecae86ec5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:24d89136c21a2d145113e7f1bbf39bd76cc48df1db31b3648d9070dd7b1389ac -size 633282 +oid sha256:5aa250df84e7f2608d35e28026a3bcb741cea8ccec397206e3fec3f46ee7358e +size 633380 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3680514746..ab7cb9a242 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:78cf8fd6932f127a52a99038bb705137b2bf370e5c71a70275aedaa5741e51d9 -size 537271 +oid sha256:954da0a5375d7b821c2178c7ef593c55991ee016088486ae5b2291e3a60af043 +size 537369 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index fc473159b5..4412274767 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4c2620f297768ff36522496c59decf4ac5e18a9174fcc7935402c180d5ab0bdb -size 603679 +oid sha256:84ca064153fe251709c53ec798c17c213341527b16fbe446f1c90137156f0f02 +size 603777 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 24df74550a..8a608d816b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:be8594a358e84745a95d454ab2a4878f9533bc694c9b956cf4297950d9bbbca5 -size 510085 +oid sha256:1ace63b0752e3c63a32b9a6b8c6fa13b84285a1e8cc45b636ee0278e14a14705 +size 510185 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..48b53c2da0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e04240a07c95a89f33121d50f2334d6f1d15534850fdcdf00a595b7a19dce7fa +size 834626 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..84dc2ab12a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d579c11ab783c0b8fd1ddcdb12824122dd5cda866cefd5cbe670aab7d9ca446 +size 746212 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp index eaaef421f4..8600cb3ea5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:89b7c21fa6a6ac15e75bb49d436109874a22d95312731af5be481da1b26a95fa -size 571885 +oid sha256:0b8805dd347b4955b5eff1d48f7d67f743adb028bb1721925271485b23ffeada +size 571983 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5a52af3fc1..f0d504ebd4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d37990e1a7962b4dfd29ba6a0db283ac3f96304119d416505b8ba5418b260588 -size 469017 +oid sha256:59ed8a5cb6511aca1dd09220fb23db7475cdd0c7ab36ea11e5c01a754c842d28 +size 469115 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp index d52fd013f9..caaa53a479 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:81678c32c4979e1c6166b745e0b3743d1820a6a0a619a3152dfae8b52a6d2b61 -size 544009 +oid sha256:babdd716addb5b00f6653794b18204f0bef241052528006fe00e9786e55f2fda +size 544109 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp index cf7c70dbca..e2dccb215f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f0ff6c70c6b716501ab19f6f228796c9fd9075d6ff865929d36398615fa13dc6 -size 444101 +oid sha256:7f8d500cf760048f69b82f651f0b4f694a06ae6a5585ed82d5033efc5b357456 +size 444201 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 64def4bca4..dd285236fd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:83648d87861944470710cf929f50445330a7d111b3770b32ae09fe2d83d53d89 -size 616029 +oid sha256:9e494e6524b1b6a0ff3f11f2982d1d619215bff3fcad4d02309bcc2b56647d76 +size 616819 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 7f64fcfce3..807cc0008e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f1bc126db189fe65b171f769e9f0f223c4e46dcfea441b3f9d2258d2e5f9d504 -size 547917 +oid sha256:2da7e0b9dcef262b6f85267c85b6de9081de07b0355bdca420844aa8a8bc8ed4 +size 548705 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 870aeedc04..3f9ff8a2ff 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a74618895d710d02bbbad839e5163db85b7f3fd6822b80ba5d57823cb2d0f6b5 -size 600439 +oid sha256:22d4d6cfb42c5d94f8f9258c24620dfd340d656baa581dc040780fee60aff7d4 +size 601229 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index f55b27c798..b95491aa61 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:59c90863fac8bba73357acc9c76b12e0984bbbff9df806b5282ab7b4cb337a71 -size 536865 +oid sha256:e1e938d6f3d3e610e6f414ad1f105e0ecbb214a094b2db1c4c7ffd61ba39f5d3 +size 537655 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d608130f6d..046a36e3d0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a5e37cdfd5ac72e8cdc0a0f8a336323e72f9e2e764e66759cebd0d8c06d91103 +oid sha256:0fb11b3d8929bab7a69248dc5f646e0341811af0088f8711fbe1de8e2d0de1e1 size 467613 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 94ee6f198e..c0861b31f5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:13ac25ed7e8eb10461dbfb07486f01f655e83016533e06551eba81a12041e625 +oid sha256:f53d65d3c6077a52af47430880f47a2ea965579a2df02665cb1f23d1bc29c628 size 437099 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a8d00dc728..323f0cb2cf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c913c80c552d70c065d5f0de4cbdfa9e81a72d93038bb54f21cca2537bdfa9d4 +oid sha256:4aab1bd4318fc2ea38b75224c2e29fd86d10e6eb7865c29c3c531375038d0ca6 size 456087 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index cbdbe65232..9a57976382 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d9894983ecab6466e8256f08f83866238fd5f6f08354db9d79271341fe4283c3 +oid sha256:664686383cb417a47244667c6546a0792ebff75691eb462e23abd133a38af9f9 size 432701 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 36a624750d..bcc65e8474 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5581743415969cd4ed3f00da1ed67d64681a9bce1b1dab280a91a9ac2921c9e3 +oid sha256:e1bb8de962e8e71c7aa79a46ee53872ba5025e045e87b8ae58ed245dc802f3d7 size 613363 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 61a3dc1ad2..17fc833665 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d0e6a8e71bc3f358053f8868abb2de73cb19a8fb11fed6f0fa4ad093fcc7d798 +oid sha256:b4893f945e24541c6b62d867ac06ed3f90bd672b8b504e2d98f6f2f3e58dd830 size 548777 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 6bb6841645..84095c598b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d626f6b505e06c6cf20b61e940272fb668d23cc13ef91103daec98c396ead72e +oid sha256:a0e0626b058cd352a53e47c656c3ec09108303680b35a443c77b71b640d614dc size 434317 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5f02a532ca..6afda239a5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5b393f020be82f06b65ee6de48ad3512d72dbfa08e9629d79aa0db09a1447422 +oid sha256:0bf030bcad782e8e705e04ebec1f0c8990011c8d476627fc447b1c218f678157 size 375553 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index e047d89317..87a12d4f5c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c717dcbe62b074f8c148a99717b195d94ad2eb3f18be5fc33dcb6607400075a4 +oid sha256:6b0b23a5585e4e3f6be0b44975452529ecccdd1adfea64535ff9928e15e9752b size 418035 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index a61e45a698..3c1b59d608 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cb416202ba2c012394dce73e41d1fae11c45a68c10c9ad502d4d8dcbdb1de55e +oid sha256:d7ec0abe33c9f051673a0e7bc137740672aa45410ab394ea7e56dc5c83fb1c27 size 359271 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index e9a323ffb5..ff2a391b38 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4de20e1b8f6f3fa545111d3a99236f8e94e348ef6e1ea074c45ed8edb190ed43 +oid sha256:4de1b661a9c47679e02d42bad70cfb993cd492e1c634d18d2d63e585f8a298ac size 456563 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 393c4a163b..d1487002bd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:520ad0af098ffe86ac17bcfb1163c4edd9201c9b276a47897e0d1c517d5fcc95 +oid sha256:cc952a35e6c876ecf619f9b85e8b9b1d817545f42998c1f4e566f08b35731552 size 426047 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index aa8c23d812..1c6a8607b8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:95fe94914d24e90b70f28b10e059718e0b0f1142ec5495d060b6a40f7b82f0af +oid sha256:673af0ca66d0c3e41b5c3658a5154764f88f8b5d85158771c5daa803242bdd62 size 445037 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6c5e9c8640..346d900979 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c3af47d23be42e7d91189a5be7f6fbd11262a4d6f2967443cfe9b4ee34c76e95 +oid sha256:dde5a138f06c40b6fdc26aa9c01c02b9059cd8cb6cbd27ca502cdabb5e440d06 size 421651 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index affa45b2d5..900b8ac899 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d926fda7fd6148037774af4eb26b461160d48f060d1999d168ef9faac9084b84 +oid sha256:ecf56f7326bb4ae95229e2d4cfef6382881851a1b6f98e5d42c89001e900f11d size 600437 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index b882f3cf31..ccfcea84b4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:20f4e5f8816302c95d1106d573b3075b72616e7d51aadaf26c88a7f8a246b02c +oid sha256:dee50147c2332bdeb44c9b0bc08d3c4b4f6e4559b83e11934805c5d6ec07c611 size 537727 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 924c18055c..48fd7a196e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d76eed87a8f7ee3edd12b1545037074131de240af3bc00af4549762288e72229 +oid sha256:6fb92bcd927dbc510f59a2a2bf5c61083797d58dedc8b08e747c53425b7bd0a8 size 423267 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index eeb7e34709..cad3aed393 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f683414c79fb57bc6930119f66bd8dd16207da16ed23c00d4b545c24412ffc70 +oid sha256:82f61a003c03d9f8dce4cc9c94c8aa0b096d089c8823cad09f7f17b2c9dbcb44 size 364503 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 8e3e2cfa33..2accadc0a2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ff40f5f1a1d91c1302e6b9169b5ed18981adeb554527e5466422ba977a41c159 +oid sha256:06e921938e929784329845ad74ce032c1d9a745ecc717c38b304ef68a1773fb5 size 406985 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index e01b702471..b1c45a4d54 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5fd7bb23625a1f3dd186f167ce219f0fc67076ae3a3648d9336bcd61a326ede7 +oid sha256:aa933153b22884d4c91be72be6a58f71fe0ae40f39b4ab5f517725b84d70ce59 size 348221 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 16f56d7299..215563c172 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5c231e3742ba8fb03794b372acdea31c1df4fd39a08508818b4c285cb1d411c2 +oid sha256:ff5a068d304f07f46e487e9d378e7cc95ce992e7759eb6312400748ec493e48d size 487381 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index e282651efc..4391620741 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:03aadfdd19cf0eaac184020c20162298e92ead756e3b856e12a2120e50cfaa8b -size 455287 +oid sha256:e266ff6f5e8a7c4fa9cd9e251d5ff911f52734178d50e5a9e43b9a988275faf7 +size 456077 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9c439e61ed..382afbe922 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:61b29d1ee070b6b79cef25562b969eeafcc17a10d2c1b93dba6e2f7e3f0fe33b +oid sha256:2c86d8d6ccccd3a44fb2f3848108643eb24070bbf49a52a0aba6ef55f13ac18f size 475855 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 7e69a7cd8c..264278040e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c53e640dac9cf8c42fd07a1108a93663393a5214383697fba3a62b28fbacc889 -size 450891 +oid sha256:a70fc00a483b377170048b5df7045fe7f2dfdbeed3fcc41a1082085c8b13152c +size 451679 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index eb3b2475d5..d8d20dc78f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5ce3431cf2b8ce5943a29982556a5256c613fec8d4e5d6b689af53f549b1969b +oid sha256:98681baab7f4ac73ad4c1bce00f4f1e4f6a3dda4b5924dd608678946b17d3294 size 655628 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 43f6f4217e..4db5c2bcbb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e37477c0314d30243034e85b7c326a3924cdcbce9bb67a773ce15fce1b6b018b +oid sha256:d92555291321661bc232b0266f4d867c2d7f1643a52dcf26396cef18b64a2cf4 size 588155 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 3f2c0830ef..2cf6d86353 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a09a102c6801dd7486f675c0a5970f8d4bf8a7d48140aa0c5ba9706f65badaa9 +oid sha256:9426ed56110094c643d7e4034b0c57c230f9f26701cab7165d5a101785b8798a size 457933 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3a5f093cc9..c05566c963 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f7219ce91663c0ec4eaa3f1543ad9e64cea659db52fb6c34779b3529d535063d -size 388093 +oid sha256:744c461cd147998f4c8184128b2375d08181b260bdeff8ecb4e31bc81690ea42 +size 388883 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 46abe52c9e..a2413b61ba 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e7957b77ac1c28cd37f6c120f5bc2f2a475723fd91005439fa325af62b1d3c24 +oid sha256:2162fb870a783a043b3b098f8352edf020aebf78e322e19378b9454be9c0d660 size 436915 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 92251819ba..84655353fb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aada9f3174415d42d3f13b35cc841feda2a5403c92026dca631d8af0baca018e +oid sha256:2254b58b7a420c2e4619e37289bc8576ec7d3f55e3d5f34c8994198042b23bc6 size 370233 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 58d307f124..021a73722b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c1452361ade97326648ccc9e77b452518f7c87fbdef6263e84c9753b81553db4 +oid sha256:a430947312d29fe797c496639649fb7423ceeb2dbe39886f0a1a386af388241f size 476329 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c0197fd25c..70ccd2078c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:43ebc4d0a57c777e590aa347d48dfabd3e70e91283260f8bc2f59c6ac89e8eac +oid sha256:5402f6bc551bd0773f46c1e38e8b8130d2242cad5b0719dcb03dac4077c82d04 size 445025 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a8a0e8f529..cd84a76ea0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c3be0b5ca8ec8c05b01695046e3c4bc71ae0993696839a06d588605b749d471c -size 464805 +oid sha256:1cd16a11b93e93596f9abfd72c2194310fe3cfceb33f6623ef4640c6e6eddf5c +size 465593 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 89fbe04f0c..35e76461c9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7913a1c95c2d4a967c04a5f46bf9cd0c467088030aea65ea1dfc0066ea502ef9 +oid sha256:246cc71b82dad03b7077b61bc1d294f7b2a6bd96bef1f8a8b34f41c3229d315e size 440629 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index d9b015b2a3..45678b68af 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0943f8f4c5142c17d32c502fb3e8ddc4511a7d41ad040238294ae52bd609d23c +oid sha256:e066dac596715ba3c5ffff5bd3cb3dd2641a7f2fa631e26f14c5b9ce0019b482 size 640038 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 7e549247c0..6c38cea335 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eb262428b5db2a2376d4a4a376b4e750372644ed6b3af4469e9c72e194232795 +oid sha256:13c8520ccffbb011ebc3fea15e55e4f63997795282deba64fb92e0e5aeb9c04a size 577105 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index e1f1c082b2..7b51a8e8de 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:25df1f4b8a1b372c032ae967a6cd2bba567a4f9411bf3b2a3ab0903d0607c108 +oid sha256:0c9d632b3c2691b87e3ac7010652941b276509fc0c64a4d8c1948fdcbdcb3eb9 size 444515 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d3b3cd3b1c..faa45771d7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a451701fc535081246f1f9121993fe14d8dd3c1b7fe5a0ded8f1d849aa29bcf1 -size 377043 +oid sha256:5af36449bdc724a5ac63cd9af3acc02354d32dd4ffeefc59eb5a6b1c7f9ce922 +size 377833 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 4cfef5b7e3..067866049c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a7bc3a859640f8bdd737cce45e256e10b52c87bddce9fb6ce9b2be7974dfe2c0 +oid sha256:cbcba5094e502e06104c6874995956bc896ab769b41f78c20d6b23b8112f77df size 424285 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 104f86aeb2..dcf9aaff4d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:364d781693619936cd389d75a123aa87750c719241f9ebe09081e235c7fa7760 +oid sha256:2962d92b11680f2de31fbfe30e3999e6ea164b7fad4bf4a7b230ce8df4c748ee size 359183 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 26e5ef238e..d0e5f8378d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9f0893fc59d67d2785ff7773a6bb1b6a9e0a5bcec85b24cd2a7c15b51cbf8323 +oid sha256:a77dd8c18fb7ea2707010708a833cee21fedb311edba4126937829e068230252 size 646764 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 55c178daa0..a024446151 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5d633960c8819341dfe383e22d7f5b9594778b3451ceca5581377ebe7f612b3b +oid sha256:201cb3ef65a0e939ad81d4efac6fab4f385e36915f7a8e5a38a6833e4db60b20 size 556599 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index bacd4c5ba1..c91f17470d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b329b88e042341d02307e1bd769682933c493cc99eee89fa70c4353d905fbf47 +oid sha256:e19883f62730c6f8a3ffa97a8786fd5d3ebf17192c2ea26d22abb45e94f0dec4 size 623084 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 6e05a08cb7..40c6533162 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef157ab59583569079df0afb451dede4ff4c7f21095642178068d3ac9f657521 +oid sha256:c6844015f71c4286d67eabcd3b3d5f11b0afceaf8dcf782aeb47caa259014f6a size 535287 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3216418e0b..573c752182 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d16cd906035a8732757d52292fc5a6e33ad2b1b5e99757b0772f72fadf9909a1 +oid sha256:5febcc87fd807174ae863028fb8f49b79211910c4c2d19788300b4aca0197d87 size 580487 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 116b5f2343..71a26638ea 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:897489d7747e2b7d30c03dc9a2aac275a1fcd50859eb27893641d40cc05190e7 +oid sha256:df9a3229b4eb513f16f7ef336b51791cb7e7b06e25a77ee1d396edf8e8d1c9cd size 555499 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a0cbd1c310..46a74ae210 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:120854c501782f2de763440d2962bd8c07ef58ebf858ba65544c52031b8cbf2a +oid sha256:1fd8061fe910439f7697d27a6338fc2713f0f23573d6b6d4e0a379522e9e54f3 size 566595 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index fde7f1ea66..56bdf95882 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7937d94ca3c5fe76a3c5e37bb602fe605adb47349d31589d2fb8875d38e3739a +oid sha256:b98202c1c70e4c6df456e3b92dcd7e6ee30784ab2c5df315115f635aeb317b69 size 546365 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 335792d7ba..6bb8d9bec2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1e6d484def8201133b8a04b7c96f4381e7887ee7d1f137122dc53fc7394fde80 +oid sha256:f31ea19c5da73961195101e19829c8158c67a90fc687f4c8d4c5907134b2f1ee size 649920 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index eed0d63def..b4f130a231 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:687191d797a549fe08cc49f3a912b267327789011d1b927e4d951397d5d767c8 +oid sha256:5e910f7a9e353ddf04ace4fc3ec1d34307aa1bce3bf45e0b9b9a47238de89086 size 593055 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index ca249cb618..3b9eb2b91b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0b59b4d6d9c1efa7532656541add170fdda991f6d2137ab8256fdc3d3745f65b +oid sha256:b9a69139b37968e70dbb3e0ac5f4e7bc8b538988f72b1209c418956b01727cd7 size 542457 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 200f1d7c87..5d4c1c7d76 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b9e62ff72e3fbdb898c740d9fef6a12faa011e7f0ca43f70491e7ef49a0834ae +oid sha256:58914666918c959afea8dc46c419e0d0847fb44de1f2c6827caae3d450ae338b size 484481 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index b06b1c7aeb..0861fa0de6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:faabd66df1ef0abec36ae3ac341043fef137c320de086d6958fbe0b0bdd4a001 +oid sha256:96d2624e4478440619c9b9045a611a2dd7a045b3711312c7a61af3f89ba77e4b size 523807 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 35c3d83305..0b27f0a18d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bc664ccfe72b2bd869c27ac425960847c56070f775680ee4040655f6851e1a7a +oid sha256:929623598ae7bdf7fdc1db87b1493651e8a0b8e3c06e4c92d996ea55e8dfa884 size 467411 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index fa61be5e9f..1f85df04e2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c9d315ac06627b9abc52299108c5a8fefe68ba3d8fd0b3d2e9b7b93ede24e3fc +oid sha256:98d44d1682bd2f00b2b7fb2414a1b27cd8d4050dc3b748c778d0bfab044c06ee size 559175 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4a114fb43e..64980b4dad 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:36bde849e7cc2a0cc58e35cfa788f3f9064d9289dc7c3fc2810e5cd787a21d97 -size 534187 +oid sha256:6c074e011e9e6edc4f367cc8816c372dcface5d357672e30d58e9324de67b84c +size 534975 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f83a213455..009573510a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:283ffa17aca4358d0a8bbb17134a0c916deb57c661acb83012fdc0e815c373b8 +oid sha256:e69cb57009c7da1ec6457deb20766de76d798b05a28ec68b594221f349c19cf5 size 546071 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ddd9dc67f5..427f398ba7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:766c93fa860388ae91a8b488ea33b0bd119686e974d3c15a059870a84bdce121 +oid sha256:4c47996dde8a945c5ee4a2f0986eaac5e19695c9b1c5319db40c3d62eeeeecf0 size 525843 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 84c1fb74b3..372ae9b764 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3028ac396fb2a93e01c18b01bbecfde728bd5f5856ad5a6fa4672fe1a6915a2d -size 626240 +oid sha256:87bf19c9dec752ed88b7b4320ab0aa5983abd72d7d64319429ac378f517b3a1c +size 627030 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 93be55a139..09d7cdcff2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:775f69a26c2f1a8bbb44df887651069f1c7fd3247a3671fec3c60feead87f8ea +oid sha256:315217fce0f86f9e8f5098c219a7087e24fea2d790f94619cdc2f7e3a5eb9105 size 571743 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 169ce2df39..8baf4e1818 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1c820e1f93fa5a3e7bb4dd9471eb4fa3c0f02bc9e23e90c8c86785726ea397d0 +oid sha256:a307db5622527af235a3ed762a80427bd8e35b7f4b4bf3ab8b4cf9774f286977 size 521145 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 30f0c6fde1..9c26f40640 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ab1fe6f643b7784cd69beb85de15bd935b36d08b10964e3a2be3bc415c7697c -size 463169 +oid sha256:fe77307701ecbeb7f0fe4b7544c9611b4ec7958dc2fd92576b29e251884cecc3 +size 463959 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 28fabb911d..c498578c22 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f3952bb9779e8df4878f394747f6cd23f7b5be0424385c4c69a698c1e02e4cf7 -size 501705 +oid sha256:c3f7dd797fb4c99de4da90ff5232779f1d319542bf4b59794f81a8d6fb4e2cb0 +size 502495 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index fdbb49ea77..581983228a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3fc86f48b78f10e72eb2218238c9814978039a9dafd874477f3e39be7d69ab57 +oid sha256:561c1a5e197662efcf37af17003a04c3daaf7d2a131f944cb569b57591c62708 size 446099 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7906b4807d..1c8275618a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eddc5fbdc2071c09450bfa8f1a72bcbfe8cfa13e901e4261bcbf33653288804a +oid sha256:60f0db2ff212631cbd5b7a2cb939cdc677e9f16d3b077a866cbc9ecac250ef7b size 600255 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index be10df6c7e..28cbf5f012 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bfeb10074be55678b499eed1c668cd9aee0c15b1b85fce5bb89606a90259824c +oid sha256:4093555b6bd26978b19a6eb16acf14503cbe9bb3bb4103a12ec6e4002266fb45 size 574477 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 55f22dc4f4..8a87b2935b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:24c9219ee28e0a9991191739658692a60ea2814e8f4f00b9f11ca7e2c5b171c3 +oid sha256:a59ba97fcae8a05fd6fb3548e756f06f63b8682f8be277874fdcbb2ac2c41af5 size 587151 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 9b855f786e..8870e11f7e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c724e3bd12a3303ff04e017e601f4f03b240a941db9fd62cef52a55d8f325b39 +oid sha256:b501fad93469f0747abe6a79eb8171d6fe07a68e578b4a8dca67e7f7481111a0 size 566133 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 4d93390575..a0c7934804 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7030ac21de66c80a0d052aaf5de7b36b9167b9be07e7e13175b766de27d01e25 +oid sha256:659d29af4b4383043e318f169398870e73a5ed9365f17cf885c159d3716b6b67 size 687152 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index bb78e42236..e623dcc6fa 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3117718b37411b1ff6e49f34ec14c8470d33871e6fb2bf336556f38ffde1a8e3 +oid sha256:71f3261e104fbd138624657cd892de1c602c3dfd4291c84d80d74f2a7ec492c0 size 594617 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index c380847e3c..70ebdce401 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cdd3258e222f9efcdc2778cd318eac7c9a88f3cc17542410b9d27707d407b633 +oid sha256:9169abc812e1b2f533fe01981be4cf17e5196621037d69db5b5f29fd1516dcbd size 565283 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index e2c4b730ad..7a98ae0824 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7b76ac64d8f340d258ce196ceb6a395b0e356b80d1f6a0b30dba2393b4c27093 +oid sha256:a29f26aa182871a789d73e74dc254273e2406d3231cf274c700d015f1610b03e size 492285 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index ecee1ef6da..f7a48ad82d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:739010613908dae78a566dc75377024834de2dd609670443214aec2d88370e69 +oid sha256:ae6236443182fcb3470ac787538d175ea8f01176b6bfe35185097fdf0070b6f0 size 543475 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5c73ff5c29..ba31d88a28 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:87b72d689a89a865d23289b8c213bd98bff521aa6b92019186f19f631f4b7a54 +oid sha256:d8e4f1881c26e99d4e067be622b741315172a9438a2c44f7799a773432d24e78 size 473635 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 375d1366dd..62846ded0e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3d235b205b4403e29cea6f6150fe6576bb78523a4b481c9147313ce7abaeb1d0 +oid sha256:7684caf09460ff4c1e408579a5fe1a3280dac82a1d462fd41ddc660963ba08b4 size 579733 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ef25a9ed36..396c54f575 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e9f5ce3063aa8a1d2d3707c9a71e61c7be618b761c7c8d2d48394bab7c332a7a +oid sha256:acdca8ee973302efdcdf642587e87a2b3d4becb11d4baa071c85149dd6213bf4 size 553165 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 900901b18f..8ad20df16a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b60b7cd8f97bb1b8e96132dd87f471af0d1972ccf8b2cdb2ee92a671813d1bd -size 565839 +oid sha256:f17fdab66462356e2f7be7b4ed59bc0a44b70d49b25c09727ec68a47ca031d54 +size 566629 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 521e75f43a..5834c1c6de 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:459a121e138f7fcdb6c16629fb0535facfa77a26a68f4d2cdd36014f445de1ac +oid sha256:14ff04de1d98f1a2f031c81b21ef8e6cd59b26132943f6b2c52f036eeea3515a size 544821 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 3124e54584..7a9a306ee0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4ca5bf432d33f08a3fc671f89c120165ec56a4273c1355c2be894f2e4564dc89 +oid sha256:67b1f52cfc5e258c1fe5045135dfefa491b68c3071f97b2d1ef1fa4bf54f35d8 size 663472 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 935637dd08..a2215c8bf6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0ad311b81d7b42d27914529534086cc46522e702f822aee5c758467c854c9077 +oid sha256:1aec7bb993351e48a84a817bc8b41aa99af7c90acd3438aeee8281f478a15f80 size 574095 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index d2225e600a..127fd593e1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f9c3309ebcae9a6410c3bcc2f04456cea3356559d89ecf93c7ddc9c3f06d48d9 +oid sha256:30ede7a1823367671dca2da4f86ecc891efc0eb1bea5d128eb71f5e39c46a17c size 541603 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3a50440ab5..8ec53f2f36 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:66c54efc0c241bbe91464371ee4b193ca543ecf130cac8087d9125e6ea065a1f +oid sha256:4c46dbb5bdd02fb551581ff7ec34845dc8ca76c494a50687c852490ba85ba097 size 471763 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 87d70ce1a9..71fc4cb601 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:13a19e2f55f1d5226cb71c4866dd72c9dfb3b64d1b2378575394d7c032cf9eff +oid sha256:aa606a97c17a1724ce40cc54e87e52f2c76bef7e9f80bee04279241dc933bbe8 size 519795 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 3d945fc23c..6e0477b516 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:81d188229eb3b6ff83cfda2c02613ed65e0521ae010b648acf9dfff2400ac138 +oid sha256:c2f0184d050ea5238ab4ef0a11291b75c17216adedfbf27afee0b8482bc76979 size 452323 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 2c786ae5d6..495d7cb20e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:632604076dfadeef9157dad11d1887f4366b731bc2500540ed3d4c9f18162c9f +oid sha256:00fecd87fbcd418f805b87701d31f80b1e049e0a5861f7d6e1ad5b169bab8a3c size 545207 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index f41d9898e1..56560cb31e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:15c328dc169d1fd20af2c0d6858eb00391e3d41b973fbd8aeb8ff73ca6567db7 +oid sha256:12a0cb88369b84ce579d73b03d0ae5430b93dae88de10370aa9407fb0db1ef43 size 486467 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 1e2bc6df23..eee30d0851 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:34a14b867e21fad295781d188b54df5fdd1cb5932dc478a854cae6ee3e718627 +oid sha256:ac55a6272361b7f4beb2bae94a43aa4dd76723e712fa1a1b13c60012c4489ce0 size 531789 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index fb6e2a7b52..565c8013de 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9059f06810b651f36f7f120b2e1ca89589bb98cce3fdf873b9c44a5459439e96 +oid sha256:19491d3f9d93759714d1866c68bab7b2b3990890d614bffb8292481e3c0316de size 476207 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7f0ddf0816..7304072e83 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:654055b3fbe4a52762df1e72ca26672a77d5bd0fb64d953ee4e69622b5a818e4 +oid sha256:488961b519df836bd7333ecddf871c2ffba3e0135e353e305dc8fe999c084490 size 440205 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index e90f988c22..24202973c1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2857ad66f71b4d8e6b27e223157deac80a41987db2a544498fba895ffe094898 +oid sha256:ac0e4528a26698c8ebabcc43f403c0b1baa55c3eade4ec84416ff6b7b4853873 size 426043 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 815480f962..6ac1088375 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6a21c16a08baadb048a6b640e5c1e5e78951d00cd11e97156cb166009fcd9a89 +oid sha256:749d7fc30247c4339553c826928118f03d11d317d0c042567ce6ce79f82c00d2 size 434969 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 72f7226b32..2dd53e0573 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2291718fa3482dd5b20b59ef98ff1d5f7a3a84f9e967625bf0b708ddfa299d64 +oid sha256:96884890bb3436c0abef0d636cb9d0404e5f519236ef33419d38da99a9416e9b size 421647 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 15ec012b53..20c4261c64 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5f05997bd3c3bfb8b64f90f85a0d5ff188ed10b3432148db0af05fa6af239490 +oid sha256:cd6cb1a6a454a7cda4660821b847845a9ac95ff9ea1f36090ce5612e683b9852 size 542837 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 52b227a21e..aae466d1c6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a1348c656401df1c817836eb9451c71442acd8c3609fa678d4d64c2d58721658 +oid sha256:2b0c334ea8e76781f461365ab5764b1b2372064f21dac55c8746ceb14ded23a0 size 488119 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index e6a0083d39..12314a5f02 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a0bd583141405202c281b6e61ce352ec1c702a00843fb51af0d502c74b1a79f7 +oid sha256:f9decf35c70b4474050ef2264e792be25bf132843217ad85a47c90fbc6deeb9b size 421881 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 28e153fd5f..a2c7f9ef1e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7a2676f7b0b1dd1fea067d67fff419ff5e5b82c2b40a34fce3415268534639de +oid sha256:35ae4ed306627cfe6aea9ce7a14227c6fba933bc7852f48cf05a487d73921560 size 364697 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 4c7b5f6a13..287b9b0764 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bd8e966e14fe3d85d88cdd9ebd09db4e7a706b1ea485bc74a5bfa85b57b8f95c +oid sha256:464e7cbd95a0d9a4a750a8f5c6425c02d11775cb5123ea19553dd15b9c1c8e37 size 405403 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b1c0b73cae..856a00168d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:da1ecaa6f8feaca56348a062f6a1648bcd1f159a04f9488a6fa94e22241e2723 +oid sha256:c18388dd38a59a59931e3a3282f37680dcf61d321e02f06311da1843edb12f6d size 348217 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b3f1de33c9..eaa3bb4534 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:938067d740dcd9c0d6f8a3936080d004f636c6c897d886a9f7feea9d46b78a50 -size 429153 +oid sha256:0ab9d6e746c766d5dcad10d875ec8e954b1129ff1086df1e000078e6e4e5a9b7 +size 429943 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index abd2f3e790..292caefffe 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4f45d5c59b62cdfd6a8eb8450d3db0a00a3aa4280424ab88ef1cc66e8d4c95e3 +oid sha256:481907ef9dcfa215b5e85bfc8dd62eea78221b5c81546582208ee3061eb7c040 size 414993 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index eebc7ea507..a2400b1e88 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c25dfa078ed39c98a30ac6a866a8726edceaa975cc72b3809f0a060dfba8af7c -size 423919 +oid sha256:c28bb6aaa3307fdc4a75ffbf20909bf15f46bd69898ded8076f58b8ffd9bbb0f +size 424707 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2513b99c80..bdaf754999 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:223fa38c9f0152176b9efe7d06f5ce1680cc1a07519a494642b3db0e8ca8d1be +oid sha256:138993b6f89aaeb844f7d8da0cabc87aaa2f41dc2113809566ce9c0b9b6606e1 size 410597 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index f9222471df..90efa03958 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5d8d8cacd85173f563aad190db7265b6ffa2b5e21a119048e475b47c7a386f9f +oid sha256:98cb4528b41f4c244198fed89e00137cb5b6020001b3107cea7d025b45a2d2b2 size 530997 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 4377238e6a..dcb8629a5d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cca4fd59833a196454c0f7aeface52749fbf6131f7643760ef9caf60bbb5f7c6 +oid sha256:47bffdc0c89388417d3796ebfb8128e638db11e38f505e9b698fe226f16b13ea size 477067 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 935b3f0d84..90192f2742 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ec9addc44a68e8c49670641501600f67865f57946896e84ce0e7a3ef07f684d8 +oid sha256:e6652e6219a5c14b65efe39710e427385e2dc39e4c6d762193a66a7f6ecfcaf3 size 410831 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 41e520de57..adf52a705e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4c9cb5b91b2ecc9969677295eabc1a0794b797bc166ce122ca04b4728ca6f12f +oid sha256:b47ce036e1d88dc826e65904b3d86c4f9fd9292adebc293345a74fe22a44a871 size 353645 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index ad20e10525..d5d7197939 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c339264c28882369c9e6000762d07ec520dd4943bc7f1fda88bcaef40f08bba1 +oid sha256:fb96cfcacffe5019f0798053301d12394681844b2847bdfa1ca5899ea6a96fbe size 393563 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index fab03e4cf1..530736039c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c8eac4bf67ea53d8ef40e0174f2ee116ae9bf51b04516f0d91d2de4e10611279 -size 337167 +oid sha256:52e6a4ad687ee221bb1e568151a5ef49dcbc4ffd0397d55cd665babce6b525cf +size 337955 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 2373e3fb12..f423c6431c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a9b20be12545b9ae59a6097d01c7281cb4b891f11211ef344fa43952672f9007 +oid sha256:7d3ea5267e5c9b2e1458e29b27db7a94853cf9dfcc5acf4ed8cbaec3ea3734a5 size 460761 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 3ca818f6f2..884719bfe4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c712280ca8f6db9a1dfb99692ac77d751974c3b72d9d7335229dec6153be3aba +oid sha256:612f2bc07ed4e0da6bc85d88cf651e6e32e225e75ad3a007a88a3f4ed5903276 size 445811 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 4aa7c4b9ba..9bdb938d98 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:039f0b82d64aa6a958e4e5d09e663cf50bec83e4bb13d1baa95a40f3c5951875 -size 454737 +oid sha256:695a9346f55c6b1e051cc8591e9f221ddbc282c30ebb63653bbc449d7eb0199d +size 455525 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 3dabab1964..fd184021d6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cc63eafdd77756f4fab66524e009f052f3d937cd8442e80a2be8084a016f502f +oid sha256:5a8cbadad2ddb9e5475ca5bacd167c84a1b241e29b3c164fb959dd9b708483a7 size 439835 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index feac58be3f..ff7c51bae6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:73aa3fc855cdef0d47b91cd30145e8bcf28037c3a5fe02ca58983e0f15f389ce +oid sha256:7d2ab805892d4102d0bf208946b7111d9d374633926b6a1b09c03277afbec011 size 584805 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 765dca26af..67a72af7a5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9d720cbbb8963ebe5748bda4cf5e7fbf89ad2c46bc0551a6e2b13ce107f6ca8f +oid sha256:e27449b603634a08ee4a087652b8d1cefa43e3acd8fd54924c07ecf130edff35 size 524487 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 58c2722a21..cd9f62584a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:93bc122715790ceda9884f5cc1650cf3908025501c23506c128722b0f3cc2168 +oid sha256:f1bff56b64a8903de1e04d640f47d5ab20c38fdc45e60d8d8b22b259d20f24bc size 447075 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index bc9bf2a6dd..2a2dd10bac 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:79046ec4bbd1db7951ebfa112983cbac24770bee3e0982afef06acfdf57307ca +oid sha256:95a30abd1c2d50b9848c0044b25b31a054d6c8653f44ed985c7e222cdcebf348 size 376447 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 1619d57de5..da7990d888 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ae1bbaf4ad9d1e55e558cc0e04bc46d86b68625db88882eb20c2649e8515be8b +oid sha256:26ca00080d25a35c974cd4601271ccb71e4aaf3c06acf0eb33483488d19ea235 size 425861 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 25cf217a75..b2f37b3358 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6326e205a028116a82fc1742c6e6d2e292846489c5dbcf9673baa1579ee08613 +oid sha256:7165e08af13059b44781b2d59d108fa2b0e9ff710aef347761c6fc9915ee19a4 size 358389 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 489f9c7f74..179fe93ac3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3fb3ddfc0e24c444e504498cfe26b4ad8c03cc6fb21b61402e399071a0f9691f +oid sha256:1e2f3ee63e905a224638c628d1a542b13b024ab419cc5c313524a48fbb8c1637 size 449711 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 15cdd76b69..d1706782c4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:25c3a6029ee8c4e1e2503c2e9801192a55a0b6d303eab1056b0e248e55a157f3 +oid sha256:c6c4bc3711dc73c27c16f20efa1da73ec45fc246867a637f639d3b3d5abc7d05 size 434761 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 14db9b58a9..551eb21bd5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ed794309218edf294ff81134b6550d14bdb0fc97e406b3528d8d64b315e91d0f +oid sha256:73b98762e0cd0e369b2e199cda843117b8d09e6416c07617d06f74eec5c458ea size 444475 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 528b50c7eb..ea92b0a2db 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9241bf8abe50afe5b8724e867b94481db1c9d8ef1d2902b714b108f707fe98ab +oid sha256:00d4c28492b6aed73d5d3ebafa438ddd353168112b3674e19f203d8ce1b91207 size 428785 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 1d66932cf7..4e77e8a9cc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b5c350e21f08c30e10188a5a27b3da8bfaf966c93691df10b31bd039d3b8b57a +oid sha256:142e70570e5e01c66d669c2bbd0878c378ce30109b61569f8e0b1d4285df404a size 570597 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 49e86b149a..73e9451240 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd9deeb5bd095bf08206e090a080d9d68aea67eb58a24ec709a226114a127228 +oid sha256:81f0e0bdcefa88fcf16112b1adf23c4c097ab76c1a2d0b25659a64f23ce16994 size 514225 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 3ed4689459..1527d65d33 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5ff0a84a208be01d886c463b4dab73e908fc980f2edb17ac1f817592a1458682 +oid sha256:37708effa0a2e1ca8d5249d8b87962e9c0398f7230b74ccc894b3e238a7ec696 size 433657 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 2bc0dd8578..7d424f5ab7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:119b4b447dff299cd0cffb1f35eae8668b015dd9f53638778b20613e42a5b9b4 +oid sha256:ec34c53680813cb2226b2f3c0084fc4a1e3896e77d3386100cb462af1085d940 size 365397 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index dc91c4b363..fb892b539c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:957b32697eccef6a34d30f0a4907381b388a7c67791efbb4b1111bdc51314b75 +oid sha256:c92a302a4e231fa32607bd255fb6924481c09914e200033826475d1c5441b17c size 413231 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6ebe2e8100..360effbec1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:270ba123b375550a612edfd6bceecc5b5433519bc38219857614110c70d34098 +oid sha256:4dc6fb9c7b0849dd252b97fa01ae1f4a7bd2872f9d47d4680a9cf75407b4938f size 347339 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 9c6430c847..2ea7f6212a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:659d0e269b8aecdc6ef7cb902dd4c20d8f908f56de0ad79ff4918b69a8075976 -size 679356 +oid sha256:f2140966ddcb35ce1364fd5308cd553f31b5ae59a062bc70b3907c21d2956f19 +size 679454 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 52dd4deadf..800046a67b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7f4285e97634a9b9ad60d0cfcfcb22c9f438509fb59a13fa50ffe8223b8e47b7 -size 593211 +oid sha256:e3b0964adb85bec2af42401cde088ceb5e89b5a71dfe7d2b8db7913bf348991f +size 594099 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 92f0ddfb9d..8f02923d5e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3f1b5f12ecaa1023ac633172f562990226bc57a95d5ba34dd19ce2e9fce68ce5 -size 705526 +oid sha256:b7de65d4b14bd113a970446b9b57996b3b22b31944b9965324f8df9bff8bc1c9 +size 705674 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 4b450ca8af..b5539087db 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f6ab3d5c4cea447223338728bd5c015ce1fb4a5bcf001153d2dba7cf49223fc7 -size 619382 +oid sha256:38159d6367f4b58b510ab8e503d0066817536c3e13f010c51c19f29567201188 +size 618988 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index bbbcfe36b1..42dc151ba0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0013aded6d086b63d85fab889eb75276bb8e358c38fa91113375c68c2cf580a6 -size 683646 +oid sha256:5ea7083cda629bcab74315478a6c33a3d5e2293e9e3161952e2137b3d730ccad +size 683744 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index e01263073a..db317e5414 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:143c212fd1127dbe2c4a02446998b6a7f394228c763141b6e88800f7ecffdab9 -size 598833 +oid sha256:ceec280586831034056813e9ee6b24cd1cd4144741017568b87fa25327edc98f +size 598931 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 84ee79e64a..b6f8c8f2db 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a1ce0ea5ecfdb2e2be15b9ac94766fe59aefec09737b7622ad2ba70b2cca3d54 -size 711296 +oid sha256:0e1ffc57049ade97f5732eed69143520530403d13568d76c400bae8329d580d2 +size 712184 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 08233583e3..27d1db06d7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0495c916755819c0e161896c90c118e0a4caeb6ca3a538e12dd04e16118a8c50 -size 625250 +oid sha256:b7ea06f8540ef85b7b1c74074a87a28eab284dde445cf1addd9a2a7cd23f05cc +size 625350 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 3cfdb42fcc..d38d84cd99 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:08c46398a98d881c664d4aafecc4df36bd1fbfe2f8f5d210790000b534146bb3 -size 746382 +oid sha256:f4e98882eb90a15674349d1c20a5b2c954948a04d381d9cb74b4e95186ba4152 +size 746480 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 854a4c0bba..d1afa95807 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4e2fc41d143f3179b3798bc593565aaf2def8d3800e2a5c22f4697b856d37e97 -size 661866 +oid sha256:83789608eba76efecff8b2a80d51e831364f4ff18017b1dcdad07ff196c2d744 +size 661964 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 8645ebbf58..5ef0a27d5c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:420a04a3de0d08acb49bf0b222bb0aa2d7dae540a43ccf7e84c450b8772c565a -size 773588 +oid sha256:a4a08034f4865c16ff80d51acf4fe792cf81ba38ad38777fd0ba6322f68f5931 +size 773688 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 1ae4d1f5f4..80a456d2c5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:72b926bde762a89592769e2c2b28ca6bc754b2cf1492c8efba594a5ca2c9a118 -size 690652 +oid sha256:ac86cfb7c022e816ca0ca0637c9b7b26d0b6c35c3fbfb6867e3eacc45ccfcd19 +size 690750 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index d557f50a36..a0704930ef 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:332791bdf8d69c9ea74a1e931a57e37e59a6d2663324fe8987f46d009e99dbca -size 780392 +oid sha256:9301103a4cfc19238b0bbbda9babd5769a30c47444e4afd04bf307bd54828a62 +size 781280 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index ed7b40df33..d69a1e6512 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:faae47ee18435855ca8a8086d4f9bf8b202b3611744a68c15b3895155c963817 -size 687194 +oid sha256:7aebbcdff3ba169c806d657b3abf60ae9ddb67a636980a1b18a43f56f0b684b4 +size 687292 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index c3dbc0a607..1ee37c7254 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:77b68516bf4bdbdfd1d5f997059efd8963a93d1a7e5c7ee21aa5c29c4da5dd0a -size 803010 +oid sha256:5e238cdea87515476322c308d6062a06f457f91833f33e9b22b57e04ee21ab23 +size 803110 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index f1dfd20c93..04bac99d1f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:119b4eed7ae27b0d6bb6808ac0a59672aef9758e48f58460b03934846b7ce022 -size 711982 +oid sha256:9b672d9596963ec21374202d1eaf9959037a1e8d95bb2d01ff0672267f2a2299 +size 712082 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 6ecaed3976..142adb1d7b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e75a7df7ed10d6e3bb3e497342114ef267dd2f01ae8fb4488a55dbc465ab4514 -size 764802 +oid sha256:923985f1427cea665ad86624c37a81c699f7c19d1c944e3c21285914b5d6f3e0 +size 764902 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index fa113639a5..3c7fedbb2b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a8c61b96dc13a40bc2d621b7981f8a62adca3ca49af7b7a7641c025bd0f53fd5 -size 669730 +oid sha256:58e8aa3b621c98351375b2a0d59f0d639aa0cd0581d664e9a21a43c0a41e1a56 +size 670618 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index ce9a5bb657..70d3556c51 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b00c177aa664b2954768fa4b0d55dbe7867d6cde3d130149e88ca67953a883f7 -size 788704 +oid sha256:0f09e41fcb943296ce801357440809a3147035ef81b8dcfa33f722b4e35b81b5 +size 788802 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 118de89e32..a10b81a48f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cbca549b667be6b308409fb4c8258eeb374007d744dbb1dc6a9e0b43ff91467e -size 697626 +oid sha256:92cc42bed4cfb26d96bce9944940a2149601d7e111bd10c9f46a68bcda81c7cf +size 698514 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 26ef1234b0..e1fd69e12d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4bbbf7b71e7c4a30493a5a38e106ef13e740a4f73d77e1ee6e3adfffe8d47ce5 -size 782610 +oid sha256:e71e7472acb39d425deb76d8dc40f9505e4a6bb7c6237fb96f94852d0e61f9f8 +size 782708 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 948bc67fcc..c9183fffa3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:93f84e6503970f7667a1e0dcb3e5816e050f5e01978a6d7713313f8a469a7197 -size 694048 +oid sha256:02b2a923ac3a96f6cf38f8262ff599f677242acab403b01003e179664c68933b +size 694148 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 7abd895353..aca17ec3d6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:25d633bb425828e79c01357e16cf26c28b25ef109f2ae4fa9305e63be252191f -size 806708 +oid sha256:94dbbf30fe68d82d96d5c8e1db5f61fa05e95ab399442001d47a1abb3758978b +size 806808 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 9b82429e31..e7f8756ebc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:130e9ee2de9129e47ca78a066485a8089fe53640fdc1a351e27d72fc4c9c4a82 -size 716520 +oid sha256:ea6ccf6032ba591c4ee0595b971e348a102c468de2e8fb8ced1b5344036be251 +size 716618 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index e667f1d6a0..a2c610dc73 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fef5fb0c8818a091d944b3e44cf73889d1e5bc2550937b16d25a9e9b4954a889 -size 765048 +oid sha256:2777d0bbdd5d7b8b5695be6933894fbf24e9ab6d34fe1e5ad30028faae8c2c18 +size 765146 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 8e8ab01d94..0e6e7c4178 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:58f6b9b2def4a9baa269305322542ea997f412dab4523d056ea1ad7fde190745 -size 676584 +oid sha256:b597cf3482034edd5db80ed80cc09123f4d06323c02533732ad8b1cfb83de1c9 +size 676684 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 58c9ae43b3..b43e30d6a2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f7ec5c8d6052ea711cf9ee62c9ce5e651240ccf2a2c3c0733554115a69ba2fa4 -size 792402 +oid sha256:82472f61e1532f1eef7d4df1b6f501fef70f1056c6be5ec249ac7db9c87f17fa +size 792500 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 178b186087..2052b4151f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bac744ad4a445d366433acffdd0e5a805097315ca54066b73f2fc71121841420 -size 702164 +oid sha256:8a38e519453306f2eb08ad4ad5c0fe8b05be5e3b94fa8310d53a0820cfffc024 +size 702262 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 61c55cc5e1..941837740f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e3caaf6460c3e98d09ad567a02eca88365c337d64ab4d885ca5d510319b0e6a5 -size 851020 +oid sha256:3aca8fb09a701ff916537defc4f1d32beea54e842dc23675c7a3f568fac70af8 +size 851118 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 84c83a5bc6..af19b3eec7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4c8abe9b6108b406fa8694b75e4db31122b262f968603de3346ddc008805fc22 -size 760238 +oid sha256:3f9a2b3a617f68761442d7f1a9ae3f3d4ef0eab0dae9edb7201f4f905e803b8a +size 760338 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 02038e4874..e03a19ad55 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:60a4b1a2557b185eb892637d6d0ffb48942ae8d7664374da3d719884bc2b78d0 -size 872554 +oid sha256:bb804a710d630635db86c898a11a48c96cd9a69bcc889dc01f5ddb12a6c54154 +size 872652 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index cab973d543..aa9b145677 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c9509e67b8e571c5275cdb0ce03f9a4ffb84b799900eed2edf46713749649904 -size 783942 +oid sha256:db3fe5dbdc6b98f61f701d412f73b2b835f5f2eee322c68fe8579645924d1b60 +size 784042 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 1af2ab840b..e3ebdf9449 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b98110a51316c2979d8199b53f29a59bdf129e7e1fc44dd73ec13c53593f4056 -size 834000 +oid sha256:b93b3e5731625adcc68a7b9b87aa07f4bb84d564a6e4f6dca346e789ec9fcc2f +size 834888 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 2dd8de7769..e6d126bbd2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6cbe81cd4ea7fd7c6c9e01994d5b30823360caf263c429ece8fde8f88c38535d -size 743614 +oid sha256:3ec79331c654cebbe47073837b6572748d2e1173148782fd6b0d329ecc8767bd +size 743712 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index f3bda6019e..35027ceda9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1cdab214cc40e05f50019a3bf41b934b76570390a94dd72aed1024631409623e -size 858246 +oid sha256:231ef67c8634193bb32b9c15ce5719a5f2c72f4dde3ce6a70c1d497bf17cf667 +size 858346 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 936902cbaa..59c9cb7151 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:608029ffadc94945e7a90621237517a494a47e88f33b4506944fe9bc11ce8876 -size 769636 +oid sha256:dce9836c41878a6f0dbe74b0fd0b7e702d800e51ddc20253255a502d320223f7 +size 770524 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 799f2bf4c5..376c7f2ef7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:760f3e2c3c146e9b8330a1881e4491fc37f17e5eb551c34c23ee2ac2b57d7197 -size 638162 +oid sha256:156c716f914ca5f8a0266e1ebdfa6d004e6fc2d59c314a0575bd98ace3a372b3 +size 639050 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 121391d140..3731cf0398 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f16015dc318a975811084a641a7179b7ab673dfb6b194d2b914c09dec327f6d -size 541805 +oid sha256:f6c8628cc8f5f332842e8adc17a23d167d6cfaf21c36632695b77efa490edcae +size 541903 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 55e5569763..58fb79df8d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6499c8656add63b47d16961e398604b30d0527c8ca35a295b89b92ece595357b -size 637420 +oid sha256:c9a693af6da61d72046b5974559369ff35641396dfdb95647aa718eccf76a364 +size 638308 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 8a20714189..59e3bb6b98 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:55066312317d13facf0e044d997014b99c666be3dfa90c0e8db2e69a1dc9e1b4 -size 556603 +oid sha256:bd99b783b95a986537df7fd914196ffbe54f0456a3b420b3eae69ceac0d03a95 +size 556701 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index c44985c761..48a89e819a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e6a8622ba02ad6787c1c53f8cacc6cc408cdb942c50c4cc50ff3c540da92c160 -size 707260 +oid sha256:18e038fae1bcc97b7ceb57b3642dbd30a1d11a529c6c60ecfd86526820ec1545 +size 707360 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 4bc06f5049..789e37023f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3460add8403b1b47eea5b5c736f24ea2d6ed014bf106e11c79fc5da5bb6d9112 -size 610115 +oid sha256:6169361c510ba9ea925f204ba188cce62c0063f9b52257e63938beb31ba79e31 +size 610213 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index f914ea21f3..5ffce24eab 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7c0f2b9474fd5cf7810643e906c105b8bae716ddaacc2ebac98d91a1d09e9abe -size 755330 +oid sha256:5d10238061c335151bcd99e1799bb49c40fcde3c70536b87c3388a9ee54e3ff8 +size 755430 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index e1db706bf5..4268eeaebe 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d0f85a80a973de62ed4d38a7266f541ea3786ab4fa4559f5e53789bef51f1aea -size 650390 +oid sha256:fe0a6414d7f21d8af3fc540ac7f356f5f01f0b66cf70e04bfddbd49451207a66 +size 650490 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index a778d21b00..156faf6551 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:36b9f5fb9c557363f3a0b8ba1c62e54ce0bccb3f2bfa7464754da72b83fa9f75 -size 727506 +oid sha256:c071ff3f39202959d1cb94ad0ddcb7cf2f36a0b0c9af5a68439f86c6d7db9db5 +size 727606 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index c6040965da..8598f6c4ed 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b7186847b97ed9ae5421ccc93d10060edfd90b4e07e0ed3f43982e56c3cb8d48 -size 623356 +oid sha256:cb1b4b2139ff424c85479537faa656e5fb2dc2e44e09ff213bb2772d0c00c008 +size 624244 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 74c031807a..3cacce47c5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0503fb38fe384b0c71b71c0989e692a44371ca055bc871a91f3f17cc796cff77 -size 749408 +oid sha256:5299995adc6b5b39532ffb45f352c79abd60d79ce0d4ed1446517bfbccaf409b +size 749508 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index cf78d1814e..5897802994 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:787024e3f72124379538307c1a96140b1795abcfde1602f78c2dd3147a05da2d -size 667458 +oid sha256:a34fe0081156017e96f1680f682cacb242814e64dd5eb5346ccce12acd0a07d4 +size 667556 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 90156c7fd5..0fa99e163b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6e4dcf94cd69ede8be5a54c56c49582414600af2515fad8fd7f91ec9edea7646 -size 721634 +oid sha256:21ae3d91c11df614919754341d6f74fdb17c222f522f3ead6f400d9139d61882 +size 721732 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index b3c6c33628..a9040f112d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3697920ae0c5774bc3c0bb8e2bdd760aabe2395b0acf7ddbdd8c0659f2a29a0c -size 639684 +oid sha256:1f8973751a0c0cb6eb8b0b352f415276271fefb4ca5893afb27bf17af52805a6 +size 639782 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index fde1faf2d2..f2875e6dbd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b02c1bc9a2a2d0322dc88364afc36388b60058c22f8e838a50ea0a5dea0efb1d -size 819742 +oid sha256:8a1cff0714fbfb9b73b8b902c787f72476c2524529d4d41adb23a5e4ad849ba2 +size 820630 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 07ac130add..d173176a40 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:11e13a555135abb86770ed8765d2e6810df64a09c3a994caa77d3d9b908a09a0 -size 717762 +oid sha256:52728aa47971c372bb32f44f0b3ed1d5b276a3dd08179176baaf8ca4a9606f9c +size 717862 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index bfeda22f0e..046dd78c9d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fdad95eb1e3285f065d50cbef08e9e62da8fa9291c4d44c4ed388eaba12de8cc -size 791918 +oid sha256:fd58f4d0d27023b0f38cd4196bf374b08ce7b04e1b6455ad5c871520dc7217ee +size 792806 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 429167cf0e..a35d704a41 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6b1b545f503e865aff2c8e48f43aad283cac5b25559998484e2458fd3421c2a0 -size 690778 +oid sha256:c4acdb4fdf46594dbfe74f1031d412a4d4da93ad31fd84ddeba0f7ae21f6acde +size 690876 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 678de4228a..4bdff7be07 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3c7a32daca7774d18a95e3e65b4a3c7160d6d32b6fbf3429dfa798f0d93796fc -size 637812 +oid sha256:06eebdf0c5aaf57e1bd65d3047e62983859c294223e2fbdd9b6d95863a2fd150 +size 637912 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index ce3c4f89f2..735b6d7202 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f2aa511ade4f69f326add851fa27fc9a16e969d73cd26d63eacea8614e0efe1e -size 552605 +oid sha256:d9cc1977197125869fa3638c6c967579d01ea8c923063d2dd956af76780822a4 +size 552703 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index a98618b08b..85b6c945b6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ea790496cbf37ef818a6466271f1b456acd80b79a52780a3e662eae13c292fe3 -size 668572 +oid sha256:f64999e0802c8d2cf1d9a1f668035ac68d08b48a74728c9367cee5433ce5f89f +size 668670 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 99f0adc793..0f732b9a07 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:acae54a2c02bd239ab1fbb3775d2b361b4b2ff5e1ad23eee580d63ffbbfd4436 -size 587063 +oid sha256:80b9f52c450414bde61fb93e7b64dbe69de936213978f8dbb68dc39a9be6067c +size 587163 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index bd7cb8b9f1..918c3bd090 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:637f4f1dea6232033953800ad785ed1b0b503beb58ecf6e386952eec69af71f3 -size 637810 +oid sha256:6d546d1590fd26003a2027e3b830f95ba373278263fba2c06fe82062d7aa86f2 +size 637910 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 7c3c15277e..a9a6b49f4f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3db85fbb5465e65fb533d10b1518c629c7f74298acedc2fcd8e0058c6881984b -size 559213 +oid sha256:c1ca44c37a5c6eee17e950477285771102d864437279089b3bbb087039c134b3 +size 559313 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 6ddf0479bc..f177b6b325 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:620d643d95d0b54f3ca87e5e3dd8ff2b34a5c74e098645d77c56689e29cc8141 -size 673798 +oid sha256:81a2e7368d6e2455a31db9ec78e6101103b50387ae07e4bd6376df310c449373 +size 673898 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index ec20c3c4c0..4447f2c2ef 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b3867960bfdf3aea66dd5c63a25316e89f4e5fdfe1adcb2d179bf78d89854ed9 -size 591797 +oid sha256:2d2094f1483772a72d2cbaab67e6434b467671a9ff01f28e98bd1ea35b4f0130 +size 591897 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 86d34f9863..8738b91492 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2237eabaad2238babdbadd41fad8ef141571d74e1bf713eb601dae192dc3d3f1 -size 704840 +oid sha256:31f17d01d351473b09645c87f3cc47cfed9d3f63a85fcabf94d5fa192f605bea +size 704938 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 6815d433ab..b4a66b96b5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b12f186f9032b88a8b493d1f3ec955358629c118738f91eced60238d3c03f35c -size 621556 +oid sha256:b48d9baa33fd353affc052fab885878e60c5fbb80a11dbfc3dc97cca8b203138 +size 621656 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index a167757cbf..ea205e048a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2f7d7ba5ef0c64fd8cb5cee17f815c785e5d957209d337fed608062e6dfee250 -size 735796 +oid sha256:d214be9e1602dc41920d4876084f0ac8944816a792b93303d0de823a0a198d2c +size 736684 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index aeb2eb8250..5a801354af 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:32cf046ccc94fee2cbb0c442f3ac23d02b4fc3a7c56beea5a93eb5c8ddc00239 -size 656854 +oid sha256:085c0a36f1e3a0818003af15cead7c4468be56687be6e35e3a8d588f302482bc +size 657742 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index f0554df6cf..f4b07df5bd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:de3c83f8d049ef76c5b2e11b096b902b50d55776c62820f70e8a2ccc45dc60e1 -size 725628 +oid sha256:8e1c22aed3c6e498d3d9d6e17f523bfec06ed7b4406c17ab18c000f07addd7e4 +size 725726 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index a37898978e..4772be11d9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4d757b5e1279b3c727cd8cc253bbb61e9cd5a829b98919717a2b5293332ebb12 -size 632578 +oid sha256:0f930e4a664a8372fe1eac5d93e6a6c112a143463ca638a8eb35e65c45c34371 +size 632676 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index e325c7e0fe..1db299e504 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a877f77b9ce88a6d2d8e29f79b8e23b61f99b6dad9baa92dc7ea05e34b9824f8 -size 754758 +oid sha256:c8e9a44d65f5c7f377abf7f6251d3ef042a4c923381db0960298d4a05fb513f4 +size 754906 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index c8cb493a17..fa0b1fdba4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f6b2ac0d4da937b9c810a2283961617ee095396edc02f50ce6a791d61510810b -size 667480 +oid sha256:7260a722bb721f369fa40ac6457799963c5bf78356fa1bcea3ea45e4f96865b1 +size 667578 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 6d8d33408a..c2e0880102 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ecc5cc2750790a6f6d2ebd063d85c01dfef50ae69a7cbd5af7970577114dd952 -size 718474 +oid sha256:69c688148fc9adb9d0333a37deb41b9f2ae89ae7dd9bb754ffe97a79a74cecdd +size 718574 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 1328d7e231..7037895b9d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b0578fa49ffd88a5f99dbcf5070bc0b150ba1b4224f4b7675ae9c398cac72806 -size 625474 +oid sha256:5b7a7c2a7b8e47da4ceb4926c75996c3cbfdc99984160b720b9797b454f6f3a3 +size 625572 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 28106332a7..eb1b1590bb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ef87b94124d98b62cdd336b869b677a740630ca96caef130783168845ec24a7 -size 747654 +oid sha256:c14e43243df48b3dc09bae540d6cbb9b9740f218d23f171466a4b8035104bf8e +size 747754 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 0d8fca6da2..aec74f8d25 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f3c6322dbbc97a71f7ff7815f9a77eafdeee70ef61dc25713672d11e55c27c7a -size 660326 +oid sha256:d54b04298182c69c5d4347136ef9b299f4a1caa264e473c74f81a383ae3a271e +size 660426 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 5ef9986844..f528e925d7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:46676bc55ac44dd5e50955e6456df72b5f3d77dd56b2426052f88fb4896061ad -size 722222 +oid sha256:74ff93b1367d0cc234c8e85dafd4d68ff4c0351b7e74f28324ce7aba83c52fe9 +size 722320 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 4ab227c9da..ad4d743b20 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:51a735eb79b4296019d3f02cbfec40c7de0d139f57b63f8a49de7f9251083161 -size 640320 +oid sha256:5e84d2298a3d6679fd0afb56cc8bfc06f86770ef29b7c454d15de2dbc2edc38f +size 640420 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 338ee21d75..a8ae76bcc6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a5fe6888906ee0fb2c9d2e50a56251c04cecfc8d8c2cacb1bae0afda85263911 -size 756928 +oid sha256:bc0f24a66570ae29b1a9e9e16420c1f7d002d9c71252edc746bea80497525824 +size 757816 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 1fe02300bc..d47113d8a3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b740f3786bdc24915a278b4be3629fe4b24cb1a5e6745db1ebc0951d629aadd8 -size 673102 +oid sha256:efb4be1a7def81f4a3a01c7d00dce9ca78a159bb86da106d55b76825a39f26aa +size 673200 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 06905d1ada..19381b90db 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:07e663d627c5cc009f70dc399fcdc0ffb7c804ce2af21f3befb915db001a4b95 -size 715168 +oid sha256:1b1f150f7dfbbc2d5969a88eeac3e1d6574f7804c4e261be55e39e7ae74ffa41 +size 715266 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 63de68e5b1..bd246ae2af 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f9b7a35843cbcf15824a6756974471035fae660fb9e2abbdcc2d25e6202cd15 -size 633168 +oid sha256:c6ab28aa8bd83340a93282967ee8ecd4d72d876f16b43eb685c60e79ce3cac9e +size 633266 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 0c72208f3c..bce59bd0b3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b4ea48d509eb9b7d9319d92597b6f3dc7e1e36a26623f609329bd16537af8418 -size 749872 +oid sha256:ac0bcf74edb4ea70741b51cdfa0c96c3b6e3f5f49192e404d671b089ef7ea19a +size 749972 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index ae4696e0c7..e9c17cb78a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5b2fb04f62b9d1dbf4ff50c1252a25351bc77a53ee2718c82ab390a4f07213bf -size 665948 +oid sha256:76ab5be650833f1fe553e910cafb310e0707aaa09f9372b4d2191cef9953abfd +size 666048 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 00a9daf700..6905722ad1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:288709e94aeaadca0b13939f31daf0585732be53bfb2e7405bf5cdd884cf4f6f -size 794578 +oid sha256:d422c9f9b73fabba9b380758700aae318486cd9c1e9250cce6385a6301eb56d8 +size 794678 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 0642d2ee8c..02e80af8d5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3a8fb35bd6f719d0dc218c3482a0d2d2fab4b54d4706600a84f9b58a57df40be -size 704734 +oid sha256:63db37285b226cb534484211a980bd2d1e0849c45f09440b81dacecf49f9a081 +size 704834 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 06f9344d3b..0426cb978d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ee514c249b6f56bf1ea269828489fed3ab780743e575446b8c928cbf8cd028ed -size 823858 +oid sha256:8dcf2f60098ec522c245990ccf0d6793af3613c573898667034c287231d66158 +size 823956 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 8cc3962350..bb21079a37 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:58743c878e5a4b544a95c5c016c0f768acb28613b056f670f4170e7869382a38 -size 738404 +oid sha256:085d320728482faa87f323b2b9f6b120c9593302adebe8d81e32f67031ffce13 +size 738502 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 553884d1b9..a4892e3d10 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:234e2f18d105429ecfb3eee95c5e87c9ed1d3c39d5d7e7250a0471f9733d4ed3 -size 787426 +oid sha256:4fad07e4215e416cae16fe936ea34c60a8fbdf430919636f8f851b7c1613ec20 +size 787524 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index c665d33267..c0979c617e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9d9ec27881f72af48cb451297353f6738bf7a244dce23d326000486b8da0c74a -size 697582 +oid sha256:cadb6533086de73dc4e7def4b489c654165d3a069bf7ed8a891af10eb789576e +size 697680 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 243793319d..0a9d02d7d4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:58241046f8ae26fb34df852173c81809c89f8aeb310af7e6e3b1511586ac8249 -size 816704 +oid sha256:01b294459a66aaf007dc7e40ca4a768fcacd2a3eeaac79b5ca4266095d8835ab +size 816802 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index b5038a0c6a..dab0b5e7f3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9f9640d1efb09e4f214b94b672d467b04e87c4d2dd2fddccf56943b8d9a1f7d5 -size 731250 +oid sha256:0cb139dadaa161f0ddc05c4a269c10245999827680b5c84aac7314cd351b3286 +size 731350 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 463971029c..ceb0fce7f0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c4af0e9bfb60bcd44afd195ca286a153db8f69c7fa931ba43d02b40529a8babd -size 715974 +oid sha256:a33c40bbff819f5643dc92dd26dcfb75de7efd8fd6b176bff29bbb6627096d43 +size 716074 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 48a2b3400a..c9d319578e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ebf3a8667f60a0a4e5b6d5978429e9d61d43810ac6e1070e5cfc730948a01504 -size 625046 +oid sha256:a57ddf3c7e5a0652a4f15ff593b63ed7c70f3b59f612bd93059fae3fc25680de +size 625144 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 74edcc85bc..bee980f480 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:158f3fd6811dbbcdeec07b07513a742f24a6ce08a906989ef9aad8e1392474ee -size 720116 +oid sha256:8657b7f65aba501c8eb77ee042dabcda9387d179404342391a7d59dab29ab9ca +size 721004 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index c74a49a86d..97e2cac71d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d8bc2e1d9ec012aae9d3abbca4c984dbea130cbe159b4c2b0aa7a5c22317d829 -size 637130 +oid sha256:36e584b0bbddbb246cdec752aae37669fb87f933a43657c4ed865971d81f86c5 +size 637228 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 0f5d06d93b..9ef1f9189d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6414125ee53aec6b6e7e8e4d234181caa6b60ffae09f130bb9be290f0e044d42 -size 823228 +oid sha256:366c797e321151cae9fe3c107599f93ba1a0ffebc8144b82610fcdb9e9a44bf5 +size 824116 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 428d3199bf..83f409a11e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3e08b6b93383c0cef67e366fb78c73921668f7e7dc3fe31842c2ecca2f21575d -size 727760 +oid sha256:98c5c6e980321a791254beb79d07e94f5a9a485349a93f3c1425d35d38f8a3b2 +size 727858 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 672dc2b2e5..5a0c0801ec 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:48a82aa6da71fb1579892f9a0dcb940c759aee0df1cd453e13d7517f70fed975 -size 804184 +oid sha256:b3af0649e21c100c133b9aafd8b602acf9c192c21922f98584b46aa4f7e64869 +size 804284 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 5dec065125..cccba6306c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:60ce6de87b68022398ab01161ba95a4098b0f2cba7738b045c9194c9cb7fff81 -size 707040 +oid sha256:ba29d344f332fa1ae502aafe4130db38dec7d04fbe03ae99bd87d6926b1cfef3 +size 707928 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 4045966f04..f9dea40baa 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cc0dd3296aa3c6181bef68b02e021f05ceb8c04f23c7ffab34465e662a20b80d -size 826630 +oid sha256:cfe77b2a78fb42f7a31379a2532b8f9ecd8012b4e7d8521f956fddd11e3806e9 +size 826728 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index aa2defedc9..77782bc2da 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:131729721ac989a8fa3244ba4cb303a29192e728ac63b07b7d6806a1f74a2b57 -size 750894 +oid sha256:0c559dcb99f4650aa504905fdda880d8da7102734705a8dc93829157a159fb29 +size 750994 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 85de3ab2ab..c803bbfef9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:260c95dbda696f4efb1a7191a349055321815a405315204f332a4bd0bc487d18 -size 806304 +oid sha256:e6591c2e33de4f620274fbaa166ea134652ed7d035e0a1ced28148120f8a4c46 +size 806402 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 73c5eb3bff..108ab246bf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:42b073ab3dc47f686c0c1266fb685ba27e363fd3c05cfa9795a01b6b9c19d464 -size 723810 +oid sha256:5a31a869fa23d9e3c872692c4422d96061fdcb567ef1fd1ee81c4cf2219d6a91 +size 723910 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index a520afc287..78787b114f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e2ca2237bdf9521fceef5c9fb246277e956163f170a0b602a9ef85684e159fdb -size 716768 +oid sha256:84d0d82de75afd5aa1d433e6ea72e0ce83b4d02d3539ac22d40b8884cf61a429 +size 716866 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 21e2ad8eb1..04dbf6d275 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2e93a6b236f031db895c7deee09afac701cb95510c50de8df4926ff56da52ca9 -size 625838 +oid sha256:ec851b3385d77cfbb9843d1b2f7c28eb45034b4632fdbbbc3503190ab29a74a6 +size 625938 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 79e452434b..b5313e861f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7996822e93ca7729edfb78f63d919d5278642748633f499aa5ff95d4aa39ef07 -size 720910 +oid sha256:9350012ee5133a755c0ac4a0a8a9f42ffa181618aea20c22f8cceb499c79e9a6 +size 721798 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 20a0177a8c..af0269635b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3a0b9f1b5aeabebec5f9f20484d70a1d6496ebceec4e0fc4e7e9377adf464be2 -size 637134 +oid sha256:fc37d47966a97b32ad2fd4129184b67bae67cda602126e0c71f75bd005c80ab3 +size 637232 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 16c7ddc38d..50f0c06e67 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2489e5f7fda1562cd169181c1cc4e871962444da6fa9ace98ce2eb6cff786049 -size 619308 +oid sha256:463f79f684a15cfa1f8446892851d0bbd4992babc17d68707abb3e3944fedefa +size 619408 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index e2e14ad301..5719a4ab59 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a7c2f60d1ea9bf5b080a3984086e6b551b110c7cfe81459c48859a8199d9e93b -size 539479 +oid sha256:4dfcb71b2f43881f968591dc33adc790f43fd32b021db362595ff8c15f7e6610 +size 539577 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 5c47c3f21e..144709c0e5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c28d89f9834c6a795b4e976200a091e9c4412ee9cac42b8c3cc79b3f34913f47 -size 644394 +oid sha256:f058f8d97b84433d4559a77d570eb1f2c2389030db86b5835f513e7b723e0724 +size 644492 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index b43ba35466..a2e13fa68b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:261c8cd2a1b00d11790faa284ac9cade0d874ff3f7a1f5bddb392fb073af7b28 -size 560075 +oid sha256:a8adb1fae14cc9b7410dbf8b351470d633473eb21f89ca1c66b8b37fd39da5cf +size 560963 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index c20e8abebc..b520553cb6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:62aabdf21f0a4c808b5bacd442d30a1b8a9783a0f9a97a911b22ab461e4f0820 -size 624190 +oid sha256:b03a35e7e2111b98c25770ad9887f336e21a6df54a58e266c8a04d1afdbcefef +size 624290 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 7b6cabde01..7a09c08f75 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8f62dabbb8a4f2ab4176298ad7790665d15ef12c8d7082a97d0a5b16f46ec2a0 -size 545445 +oid sha256:2293d62f9ef753c20f6c1ece737a38b36775a8c80fed92d21d37f7f8bcef1086 +size 545545 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index bcca227b76..267f63cfec 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:388b2acb153f991a067ed8089a00a8b587762c58d0bf55466c82810982e48af1 -size 650066 +oid sha256:7685a8c7530b725ebcb289d74a0a767705f3cec709dac1ea1c220e9b2ae16a93 +size 650164 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 0fb3825da7..d3b721ce8a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:669af50ead0532db11e01ca4f397badfde935e4956f47b560f3f602fb72e5e07 -size 568015 +oid sha256:8a038843c26179913d596b9e1a00e4e1758307bb6cd4e17393e1ab48ddf2b9b0 +size 568113 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index d0f443d883..c452f99bbf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:077f34b907b6d18f23ff59d99ca3200e7266a92bec8ad4fce9a91cc260ea0721 -size 686780 +oid sha256:dbde93cd9a21f0f36de4747f96687aa7094775d5e732110857697269338381ea +size 686878 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index c0434a8af9..d12b547801 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:569a2cf754a92c82f26640cff7efac2c677ec90b73fb2bc1ce63b5d179d878f5 -size 607097 +oid sha256:30c518589dd3f2433c62ecd2d7e2e49baca45d4b284b519f9216efb78c4bf216 +size 607195 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index cf27008806..d32d3bfce8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ba705042a0461be807ed1abef0e64e0a56f1cc503f133bd5f1a38ee220755ec -size 711618 +oid sha256:8d70516eac2f6ebb10533269c84d366260ea76ba6f11aa9a29a2e6e878cebe0d +size 711716 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 1099d38c79..a918cb59a4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a8aa527153157156c21a540b3639f5f325b809515bda8ccab46df02a3ad3f170 -size 632380 +oid sha256:9398946ea4e1ea33488a6047964d355d2875da02aed7a8a54af8b34c1159a09e +size 632480 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 6964ebd6ff..c434633655 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:51524d11df683ea16f3c0341f98350b9d2d145fcac57dead5d5ad7aee68f7421 -size 705842 +oid sha256:0668b97ab2a8e83d83c2983af80d6285c18fcd7bbef6fb184e4b80d7975b8873 +size 705940 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 0e1583ec30..19bd169efb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a70ed2b416ee50efd4a04b030867781bc728905610118919734774dca2cd9dfb -size 626062 +oid sha256:2ae1ec6a3540d937c210e40529f991cda25652535b8c231349b2294e0a4b0f78 +size 626998 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index aa2ad3d534..3fcb01bb70 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:47b4a437b2630c86b144803a8638fbcbde9c68805a718807edfd3f3121342901 -size 730482 +oid sha256:392c80501a244fd816bd5131b18149a458a2f261e881cf88d9e190560e01d1c9 +size 730582 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index de73168291..e2be58e1db 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5f319e10be8a4d9c7c6a7e4b4903cb45215a871bd60e5981a6e5a6b17dbe2ea6 -size 643550 +oid sha256:96006ed0d92baacf2f3ba875100a4dcf8bf0f656f87fc39618dd367ba981889e +size 643648 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 72e5d19064..bce5d3ed6c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d3f335bfb49c37d557ffe821c89c9f0585db05edce25ff066eb60da7408bd833 -size 698688 +oid sha256:a4fe00f961e331f64ca61883d21776dc071a5220f3b8eb74723bfd535ff6761d +size 698786 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 474fb9ee7c..773b77a032 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6267d242e0d151b0a02070256c8e12b469aa62cfc7ac427cb7a91e00c9cc75e4 -size 618958 +oid sha256:844f75f27ee5e9275c92bfc477b681125257a890819fb4f64f00371250dadca3 +size 619846 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index dd667a4dfb..5f5cbf5ea0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cd66335f7cdef7d70c771ced8829621477a60079770cbee13fe9899e62aa274c -size 723330 +oid sha256:2743bd8ff7b2ba818e7c8a42162ae9955c1b522349f58a3221fe998f9c8ac154 +size 723428 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 8043ab81bf..2dbdcbd43b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b6f8d69b58bce674ec1511d8e42008c0cf460c87a2bd39ebcc34c00516179b58 -size 636396 +oid sha256:e60a55852f38ff570dcbc0235b22b3ec6abb50d202d8f38863f31fdf1ca65dae +size 636494 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 86b31aa480..4371d7f024 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dccb03de9cf72cd243f9eef4bccd9f9fadd8e6764b5de1ef5127864d50c4c7cc -size 708306 +oid sha256:8c5bf417f2277da927d5de9d64d72e2c2caa685a4fae1fd78d689cb3ed536b45 +size 708404 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 8a06ee4e8e..8d38166831 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6518fa95c996225e8abcd4887a806bb0cb6cd50daea7de6735889a87768aaa1d -size 629710 +oid sha256:185000dad6220e66329910bde74eb80a9ef1be020f2055dfce2c309e3aa47017 +size 630598 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 22474c6df2..65f9576cf2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e651e19475d0fb3504b5ea0609723b23b7d8bf2af1d737053afe916f0d9b3125 -size 733244 +oid sha256:79950bf9f9e1f46d5dad74f4b85ba36e9f913aac988648fccc3394802dedd313 +size 734132 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 7508d3b6ff..b272505003 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b75a86bbc447f9557b4844f69f66ff144681b9eae5376183d2a614c146e9fa85 -size 648234 +oid sha256:891cf3f67e1c6d1cdec54945fce1066fdb362b20f605481a34c86df912d5b9ec +size 649122 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 5801153344..3e2498de15 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f73dfa14b35b4bf4e5a7c569f82f2f94658c43adc82942ed52f6613bcb1e5aba -size 701152 +oid sha256:26e5ca6cf72c22e32630a4e577ed49a4df121859b1c8618950433b3dc8a3d6e6 +size 701252 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 99f2c06105..4f17061f65 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6d000f08f3495ad65955b68e255c7b82a904c8379a8617e3e5d300a5de1c5923 -size 622556 +oid sha256:5fd69b113dba142d210743da2bb4fe693571e0550d01cdf0a6782b4773fc8750 +size 623444 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 55aaedcf4c..fb8d94e1ce 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:815fc9ef40a4958b4e7ecfa8b4e94b1a1063dd2e398fc5ed7b68a3f04056ff58 -size 726090 +oid sha256:bc19931108a739081457b6027fb5b7804f77a90920c6014c449838fe124a6caf +size 726978 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index ea8f51a984..376901866e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1c9fef2fc32ca5e613b60529c0da903c8b37c9808a25f83f818ce8c1b2a791a8 -size 641130 +oid sha256:e13ae3033adc1b33c76fbb82f2f175de9c9c10869721bf639329f3f55e19fb5b +size 642018 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 06b37f9abf..0b71319791 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:30caa0b4be58513353a1d725fd0e0550b165848b0da07d4f2d02827ee26f1445 -size 774842 +oid sha256:eddc290f3ee413170e6ac4cb2c7c54a58b6cac0bf232d4cb064bcd215edef68a +size 774940 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 08ad029852..fb43a2ecb9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fce77c558e2895d0bb08c4037a6e9f0f16ac809d386b6ca253250c79534fefd4 -size 693138 +oid sha256:7e8c3fdfac6b5bd0c18a649f4ca7f5010891090c4b95ada78dc1fbcdeffad2b0 +size 693236 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 91cfe47dac..c48707b369 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7c7c62438f428a24470a0e2554e0e1d61fa69879eb10d34aa3959dd8836e83ac -size 800716 +oid sha256:cf5eef41520bf6ade0e508f5b5e2525c75049f8080ccab81bb4613934d1a4da9 +size 800814 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index df0e6c5b8a..2ffc891d38 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:192890fff13dc353e9b15736048f0c2e01f1a114e53717a015cb7d6add0735b3 -size 715214 +oid sha256:170f47f7aed3c1fa24948df2afc3dcec410b0035831dd35d01e5937dbc93b331 +size 716102 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 7eee3964fe..7a24ed9c54 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f835e61c85ba13a8e3973c2750d2d6dd0653f6f1550decc0fa77e12abd573f9c -size 767688 +oid sha256:15eba449c018a52877dc4128aa39c2ed77c5318e77e98cfdb35dce210403f3eb +size 767786 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 1294201e23..516924d0c9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d0e9ec16f9f6d2b1b099b45be6e1ecd33cb8030001d7b1b6a9b25daa76f3031b -size 685934 +oid sha256:b241b52d35e730c5a89389d46cf4e4082f311e89ae3767cd9a60e6d56ee6b661 +size 686034 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 2424676f22..c7af2a9e3b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f5598a7a5e4246c3ab25f8545b20b5d409d4f1890e3d258d765fbc79579ed29e -size 793562 +oid sha256:30f1bb9f05fe7728725957d3ff487c1f55036acae12f390e6409a8e3915c207e +size 793662 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 4e7a4b3dad..1e180553c5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:45ab96bb14c9f2b6f9bd5d29544a31aa269fc2c95b7c72229456be5d9f00938a -size 708060 +oid sha256:0bd59af56f4645d115421b58970b3b08fdd70ee5edb053952848237e274295ce +size 708948 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index fde92ff651..b7bd03d9b5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:af2dfee64a01d75c59f29857cee6656417ab8a391061aebc8b842dd7f46910ec -size 616003 +oid sha256:22ee881936db977c377aba54b0a9c955722ffa09197d32776a9509cfc1bc9e95 +size 616891 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 72a9b5f2c0..f1a6008aaa 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1459d6af7a70d203896e2977cb7c5322e4ef6ae53650c614858f67ba01b2ba76 -size 529315 +oid sha256:c47301b76a948141eccf6c9d4343c331ecde1d30583bbdc4505270d28c107833 +size 529415 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 0789ca878a..7c898613fa 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:04c5a8e49cb29da83ad7d1cc0dc439534b43967cba8387f729283b2064d57b89 -size 616543 +oid sha256:502d9bafec999fa0af4d8af4e409f0ea7076925d5445835d705754482f3fadfc +size 617431 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index d6246705a4..a5628a9bf7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7a2480368b26fe4b5d4d68d82d9dc987686ef4895b1755c82d15dff1cea6e062 -size 531829 +oid sha256:c7748a0b8a856d012f8f0e00b769d8f1e046a2767e31df29c639e835e633662f +size 531929 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 26096fca6e..e52776edc0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f781af2d901356c6599dd64a74eb901838da7e0da49b0ed9080fd1c130a2413 -size 680810 +oid sha256:90ed80ff67e4bd29698e098c9da57e6a914335f54fe2bf94a41ebf227eec225d +size 680908 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 780bcea70f..4c074577ad 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:26db7ca567af037fd951207ab70f00f11678573f9b0b70d7f77f02a8a814cd6e -size 595997 +oid sha256:8ba6913dc2d8f168941d723ab2d22862edfbd2d810a77f251eba8ea1ac6668d2 +size 596095 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 3b556e30b5..e5124ad398 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:02dbf84cbff91b8b9ace1e1d8489d19c0ad1f64268c2fcbdf5d301366e62f45f -size 704262 +oid sha256:cd362fa69c66ef5cdeafc7db9ebf40964f0b3c77f1851bd216a2350020d0033e +size 704362 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index aaeced5640..b45b92ef38 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:452d291819cfd8b185a880ecc63f5bdbb2e04079387d32e66aa1120715d4deb4 -size 618908 +oid sha256:1fcd9b8449c7afacba1239585a21bb2d74cca8acc3ac361cb43c993661b46eb0 +size 619006 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 8bf9e469cf..0349b5dca2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:301249417048e9c776c82ebc3cc376408bed21d18e7216ee3f379c0fbac7a4f4 -size 690746 +oid sha256:d3e5f2907fab238ed833aa0c440a680c3b6f902102ed82fe411d64d14a599145 +size 690844 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index eff2ade18e..13b7ff1d36 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:085fbf3445426a2b7d4cc3797e6817bd2f761b071ac993ed93c20796dd444530 -size 605389 +oid sha256:e71d746fecb602f4a854c7933ef6b7491467fa2e4ef116a822e0a729c1d54eaa +size 605489 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 0ef741edf7..dabb5f4747 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:afbcde2aac05b84e862feeeac9fe083797635f5d58339813a44aef3596a3a955 -size 705692 +oid sha256:deb18d152a292bef92ea984243362ed298311e5402a09edc9d022622999feecc +size 705790 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index ef2105d5c6..ffa1dbcf35 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4c13063ec988542972c678bab23b1ee4c52389d3ecf0630e47de050ec9f4cee1 -size 622014 +oid sha256:bf4cc574b012572c80b3d766336bfa5e982779d28e71ea075dc9c0b3252a71f7 +size 622112 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index fc7de85752..06ddef404e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9210084a982482630263513ece4296b004d67a53c233a4b4ec68bf4728295bb1 -size 692174 +oid sha256:5dea04934ea5164764cc0e617161731f0a639ff4b13446bff61303c68518c4e9 +size 692272 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 42dcf10612..862383d3d6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2da0a3e7cc1a939b2089a15ed19766dc96ebee9f92f95b3814f85b44c3d2d698 -size 608495 +oid sha256:1dbe32c76a5cb7aa707a2cb49c5677700a95172826c945c5a9993d7ac3e6c233 +size 608595 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 17f0a01c52..e3132b0c45 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:af6cbd100f278d75c3c14bf18b6eafe1201cfe222b370c75e58cfee4a166abf7 -size 776420 +oid sha256:0818e9541a750cd5fdb49ff85e7bebecbdddb74e0c8f532ec14e5efa4b212536 +size 777308 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index ebfabb286b..760885a2b9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a65f6e81a7d60cc8edf551c19e811b29c8283d203f4ba801d2b7017d3f52005a -size 686822 +oid sha256:8de8ba8c473d14fd84e0c12fb6385383dd1caad8e0439f2dbf952e4376133afa +size 686922 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 5a9b7e3d63..c0a47f2146 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9a4e2c269bffeb476c02317098d6abd94e89085cc4b16eb8a906770365027833 -size 762952 +oid sha256:945b68f498af24b84ae12a4b2217f9eb3855b68a5cde64de2c967a30f3b56a60 +size 763050 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 582fb91207..43948d43db 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:259141a312b52803f354e1744c220bf45c1fe9b1d7c9a9987f54f656d9dd0314 -size 673306 +oid sha256:90119c55d6b2ac29bc5213436ca38a71f0f7fa5fa23acd8b87bd125309a2d859 +size 673404 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index a56386a90a..8d1968cc07 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8fc5c10d56e6dd1eedb090dce61e64c82d5bb34305a831ed1a2a00857ce38587 -size 610375 +oid sha256:48d92eaf358a3898dd391146fc0553042f8470e45e1c608f9fc26de0eb62b257 +size 610473 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index af8b89034b..5e64415435 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cbad63e559bb17288559d59b4054603f05c0cd689c988f960a53f7da4b1a70b6 -size 527733 +oid sha256:38d483a894735e56796b1fec040790a141cc709051b742d2765a5f665bbdb68e +size 527831 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 96b2ddb393..7de9c1b078 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aa82aa0138c54dbbb4be7256bbd50ee4207a1cf9a918ed5a48a7f934f7eec083 -size 635412 +oid sha256:d33825d9e13513c055736e2755edf0123f134dc2c810820377b3c17b5b2287d5 +size 635510 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 51aa57e32c..395e54cc5d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d30bf33c79f4ac311cc1bc255f375dc459c9f7aa9d4314cde082f63fd54a55ca -size 550007 +oid sha256:e12611d813714f55ca613fc9c1b86e0b02c1998f6eb0e9fddda31860fe203fa5 +size 550895 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index d2a7932b8b..cf49f535fe 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aa95f5d847e8362a74116aa18f20d7e6519144440d1201c84e7dee73b0efda1f -size 615207 +oid sha256:a9b505811941dafd3b829af42593c8b70dc138282b0ed4fa21543025475abe69 +size 616095 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index d0afc93f8b..8dc7f7e06e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d0fad2cd736d5364c236922f62e88eda2d53f6eb56e7ad80828e8cf444bb1d80 -size 533799 +oid sha256:1169f9f0817325e26db9c9de6b3c35c55e7d63a6642c1ffaa1a643102f15270b +size 533897 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index b262fc71c1..543178682d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7a87c00cfb1b3136db6125ee406573d91bda70d617937ddc472fbcde1ba979a1 -size 641082 +oid sha256:a2baa5a61d0bc5986aeb3304798e302762e722a47d08a2a35a773d6126566614 +size 641182 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index e4f1c74ce8..e05c5f8862 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2676518a63a5f5748116da313d69600b3367f69b917ba75fd70211b58e6430b7 -size 555875 +oid sha256:de3f2702fce7be192900d5d911671d741bc280001a0654c885e927ba3a214341 +size 556763 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index d7a8befc6e..2b67e57960 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2494e6c6fd908556f14f77a84ef703d28929856243ac525a48ad2f4de5bc4387 -size 677796 +oid sha256:591906617b540caca39bf07b58e9a41e42f65d3ce2fcd7da9cc1371e4174a42c +size 677896 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 1d622e138d..639808e656 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:01cad8529e59c4c6761c0ce5f2b43ef999972da253f766f7b0b283c29310f92f -size 596535 +oid sha256:3c45c297220b8e885309f8ca34be9100df08c8e776eccab38a95ac23e410c406 +size 596635 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index b07e7a2e16..468c5ef8e2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:38d7f35851e2176b507d29f74fc9fa27005e938673837414d60cd02a22117fef -size 701846 +oid sha256:03d63a5a8a2513bf3c5474536c5230027d47f8af6854495e6fca36bf0637a8d1 +size 702734 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 045a930e93..d349f1ecf3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:777145f8395b22ed98a813b2b5df37b4dd9445b6ddd85cfad44353dbf67fdfb8 -size 622312 +oid sha256:5e17e612c35d1191cba826d0a6504c6983508a5a2a479d075884d4693ab8b2ae +size 622412 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index c9e17c5b36..cddd38639c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1819a590d4739c2f2126029225dcfd0bf342cff15eef14901882286c8139b622 -size 696858 +oid sha256:cd989a1000109cc839d1ed060163beb50d8562917bae56f59c7cf964c1ebd4d5 +size 696958 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 8adb25e8a8..3fa59fbc8c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:857c1095cbb0f362b862528c99d600beabe85387d55d0ee3d544b8a92831ad0b -size 609925 +oid sha256:8bbefb9a566c0052f6b72242a8d54ff6ff443ce40b03e16256f5e3354e80bdd9 +size 610023 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 958f1dd162..1838d0669d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:201e0ce71678d6f05857dbc699683f343f2fdb8f1e4e24fa4b6666cf9e540259 -size 720710 +oid sha256:6a9c8aeb699e8a36afc48b9631eca335b7952f88262a41bf29a478f0ae219ce2 +size 720810 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index fde0b766f0..ae35256170 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:507fd159e7069537197e05107c61b49782707a561c23d890ccbf8d249db64d30 -size 632198 +oid sha256:02e495e8a66aee2b1fa1cc9ae6a8545ee533a413645f1d8b7a18e1ecf22cab52 +size 633086 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index a894c23a0c..d2bfd2ad8e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d13af314b319ee8948996d47cac8dc4abf6b1bc6deb770a85eee2bdcb3382b56 -size 689706 +oid sha256:b4a46a43c15ecf09b162170bed905afb1d7f6b0e71821527b850e19a5b538ffb +size 689804 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 6b1612df3b..95aafc1dba 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f708e59b1b345b4604cf4e842dbeb6f5125e10ca1556d9c0a66c666b2e737ae1 -size 602771 +oid sha256:a7800f6945cd6eb76c36d6011f623b227af1bb8259298c5974e36a2bbb3a98e9 +size 602869 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index f6d794040f..3bf177f59c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:92b68168c96a9624df85a463653a247004b96dae04787583a7a91e7dc625ca3a -size 713558 +oid sha256:1c08b9a780a5553045235505c878a2e82691319651636160d7e935a2c1852a3b +size 713656 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 9ee1eb1bcb..ba9eaddeb2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:84c9dc30c5aabc5a8910c34074ed54417687122fe68850a66a0c22ec9e3ee390 -size 625046 +oid sha256:b18eb6b1528b32692ea643395626b60b0b868a5ffafd0d6c755625b52ead1889 +size 625934 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index e5fb695288..14b13eeca5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3543d5ad006317b1250e18dd3c8ddf49d6fdf1e6eecb4df3c2ead11a30eb39d6 -size 698534 +oid sha256:e53ffdbe8faf228109605f9a4b6d0a2b5c7576bfd66e0d1bb2929a918c7a1f69 +size 698632 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 38f13a7ab0..a939541f7d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c260d10f5ffb2b288bd2e6318344943bb45d920f24b5af13fc1014796611ee2b -size 614609 +oid sha256:972e5da1612d47408fa9cc5abfa658c8b294d3dcce3105ed1a9ab758cd8ab3bd +size 614707 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 1ae666bbd7..37b032ac74 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:332d8568249b987407b5a975be4f35999434d9919b8faac0307edc3a5706af61 -size 724310 +oid sha256:87dab7583e89f9a15c4824ec26a8f8f96df11d76f71dd022ab49e5e2d6e2a5fd +size 724408 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index eff745b0dc..d42ced4b90 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c49be396103c116fc6a4365257591e4d1494bea95fdc55bd2066dd7cfa378cb2 -size 638512 +oid sha256:e0b080d5d9e963f426d85a7e74873852c8e07c6142aa664fb53fc5fecff9aaf9 +size 638610 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index ea02f28e46..89cc095391 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:da3a2792326874618f744412f8ab4be093c470401894ffdf3c69df1e76199f10 -size 691380 +oid sha256:aad7979d284673dfc4497d2130d1746954e389da455e441ea93185b952d284f6 +size 691480 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index a44b711a11..5791f3fea3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3adc822144ce71d1e6530b850a8ac11bd60769b3b1cb63376d4468c73f04610f -size 607455 +oid sha256:68264262b4896e74f6f9448ff61b46717d25356ae8712a63bb5a52e80d90fb61 +size 607555 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index e84e551d7d..56704231d4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3791ad9c61a386b4a25d2cc6d5cdfeac3c19986c82b4c36e668e079962107e47 -size 717156 +oid sha256:00cce53a0d7041ef96935c97cb0f0c70d01fe8bc37c13770449538f2efc5c75d +size 717256 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 719a823deb..0e0c347e6b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:451e43b3b3b06bf7040c147ace3279a3f74aa4c7ece7b16c25686168d7f3dd56 -size 631358 +oid sha256:0b8ea2f57b9614f8724aaef5885482195e919b35ab827663b8b59908e7a56037 +size 631456 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 3cc9a134bf..e72f10b19d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:044ed52bdfde4be4547609a5457bec7a9bb271cd50e1e6fa320e96d716ee4903 -size 765858 +oid sha256:81dde7c1053afab4bc16c61d281351288aeaa7577a5a35befa4d1e8e89c596b9 +size 765958 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index a9058680cd..4c9ebb5d3c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e2d07037916696a038a9021f8278163d49299f5d219010818d8fe92bd93a6a65 -size 680454 +oid sha256:136b74fef0e8fc98d896df5c3ae771c0ef96682df5b41101c4d54dce1e396bf5 +size 680554 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index de5e0794cc..97a3644af8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:603f0b8f9dd3f44f644358fe25baa5d45be51d115cb449b7b50cbf2d550b73f2 -size 790944 +oid sha256:3d4c08429de39524805d1226af2177103dd58daca426435e147d84aeb41d911e +size 791832 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index da1e057992..391fef1e19 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6eddf19db2c2443d315b7ac87ab7058e76df02805f4cbb9ebbe41aca914a1517 -size 702778 +oid sha256:1eec57c0e8924291c5c56160c08efb16de4b20ee7d0357adbe04b2c427d74223 +size 703666 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 26fd37caf7..b90e394ca0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:76e400e08346202d6dc60a8e14e8b244be1d204fccf2202d7bfb66a71031bb57 -size 758706 +oid sha256:a1ff30ceff6b151b5d2fb8020b5074cba6a1ce3ca29c83ded5ecdf3b651bc0b3 +size 758804 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 6bb561a1f8..89de436dc3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6e93fd7bf36229d12ba450262dc059337b0c41500b020b967cc4178733fbf06e -size 673252 +oid sha256:8670b050e16648f7f262c738f1848730489df81891b1b35cc970ccd6164bb9c1 +size 673350 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 69f806d45c..9749c0101f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2978b990ad938dcf6ebba22959923c93c1874aea2f49fe0ff2d3f6f30733bdcb -size 783840 +oid sha256:08ee437e97ab3e149be129a248860649fe934371e06cd138c232de39c326661d +size 784728 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index ed6a713cbd..cc7115107d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:758ba62987477ce6232b5d3dd417765770797108245aee8ed766ca6d8c2b6c3d -size 695624 +oid sha256:e4fa1ee40adce29a1390c8265dd1c95702a2d0fa92bcc24194e79b67a9b64b27 +size 696512 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index a2f4e331f2..9f2fa02359 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:72f4e293147e505b1ecb28c9ec8198639e7fedbe24ae30696a6c8e667bc2c572 -size 646506 +oid sha256:5be53b102f16e21eece1cacaee78f08da97492d52bfdf3fc69153e12af368fe4 +size 646604 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 13a35bf205..aec75a66c6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9dac9cfac4f1fa4d7f894775eaf86c9f8fe93628650c1c04fbab3a482cb10d21 -size 560163 +oid sha256:cf8088b3744ab1c4290de03b7258246412991ae8cd2b37242ed4ebe54640a2e8 +size 560261 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 94cb128762..4c05ebdd68 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4b0a443d3e30be3db5984faffa3c448e94027b420050c2b90cc8f9e2a43552ac -size 654988 +oid sha256:5a2d1a1af93faf2fbe5b6c8c9b235d96723d7e578bc401829613fbc28a9a8c35 +size 655088 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index fbffadc86a..9ab15a7cb8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:438c3f51bd8fa93043e709f9885c7711300230913cef4b7c7dcbcb6286c4efa6 -size 571557 +oid sha256:6f57e43d868115fd36afad5f38ba59b40b9f1af2193ff291bd15ba27a84e3744 +size 571655 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index c73454312d..07099c9352 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:48e918fa161959164c1f7da378551ff701970da082063ca754bfa4fb545c7219 -size 737478 +oid sha256:7084a718a3e70274fb26865522756eaf136d2a45868297bcd0838bdc0145c2fa +size 737576 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 62cfce72ce..24133b854c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6091dc566a7b6df076a71db75b1b20ace896b74be7eb9c46409f2c6a81e6b190 -size 646696 +oid sha256:17179e706f6a0ef2a13e151678059eb16309e872a8a2b20acbb5c706a70578fb +size 646796 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 35702ddf85..61f5f9b74c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:05e848fe97455e4ac3c8ac377187ceeac8d354d9a568e6843386111eeaf5a2c1 -size 727956 +oid sha256:52748dae4d6abd0b0df0996ab26ebcc4ba840a50a4cd53b7bf205c93495d0b06 +size 728056 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index e5f6a87dd0..86c835f791 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c01e65c71f8b8feb6d93262d8d74992e6348794998708f5c539bff3d277a3a6c -size 636386 +oid sha256:1cc5b822115155b01a32d6f903a6456395249d37ef6d4ce83ebc6a9a80bc8a82 +size 636484 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index dd5ab1d959..64a5d99257 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ae51ec9b7a3b80670bfc05155cc3be29257f71f2a5108d97f3f5165e610469b4 -size 744136 +oid sha256:be4409fe2b6c307bf4ae17fe64b8db28a70f07242ff0d4defc1f45508aadf69b +size 744234 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index a56fbb034e..c32a92fe19 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eccd935a841b167baa99eec537175e3d6c8131de9284b491b40e4bdd0f93aa91 -size 662382 +oid sha256:ad68a8ba82c60782ce6d0739c4b911a0018e4c08cca2f599e96472c9f875bc80 +size 663270 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 66be57ffcf..52e51d23e9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6c893563c0c02615ccd8148ead3c3284d48faabda65895d81d5c6499869b6c0a -size 733332 +oid sha256:09b9d93f0e39b55644f69ef59596176856094fa1fdb0c973b2e2843d062752b6 +size 734220 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 21b82bd3f1..01b6991fc4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4d425314105bf0fb4cbdd64f7269a38840b4d9d2d8fc04dbc9f2c8fc9a674feb -size 649210 +oid sha256:224754e715ce57528629e84a72756a92999f22d44d2dbf3da3aa44b238722d23 +size 650098 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 492e66e769..f1290db109 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fda9302b583d88864d9e2eb9e0ebe450258dfc8d6f842201c5df1b082b825aa9 -size 647298 +oid sha256:4b065ab2ee06534f0c49a72a6da779a075933c8e8147d6630ca42e58c84b92c9 +size 648186 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 83c7b33435..301b8b4d59 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2c834eeebfdd463383b3fc29b53eed690011aca9fd4ff03b2d65ca0ba9f4bf7a -size 560167 +oid sha256:7f0a923eacecf86f8d2cbdaed729c5265acd4bd0b141dd4f32098e129ae81ffe +size 561055 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 68eaf0ac50..5969353b48 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:abb3a167d21a8e58283da9a804a05d714d6ce527999cbfc182d7aa1dcc75c734 -size 655782 +oid sha256:37c4b455a2cd8f7da806242a867419bb3f437e878bb15302e9e3781937857419 +size 655880 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index f8c9207d0f..2c6ace684a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:69fe29df7de2f4ea3456922364ebfac6ac1a21289226b54d6beec1eb4403b143 -size 572351 +oid sha256:6cc62f9f8ae47acd0c3c81dfcd1181f38b3d216908961a11c339cca377ae7e2c +size 572449 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 306a45d5cb..1be74a2c04 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0cfd88e25685278663c6248722665c4b9ad4f5f0ed6944d253240bb4e377a461 -size 639774 +oid sha256:7603528a7525bd3aef2cad729bf84b53b6ceea4e5fbf93a887f7a12343237aa6 +size 639872 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 6c52cd6798..36f6c5f496 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5b17979004478249e305ad0f61d83a6987a0ab87c48f2607b8f8ba4f84d477c9 -size 554319 +oid sha256:ad300bff0ebecbccc46f5938e573fc4e18aab7b0e3d2959219d264d5976f0629 +size 554419 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 86b234a354..f5d8bc3641 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a528a148a266b82b8df255e98d99df8e4cdd006e492af2ab1b4358994bea0a0d -size 664860 +oid sha256:43777b52da50e5471d22d323dfabb9b154841029993a29a3d64d7c53967e333d +size 664958 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index dd8fe6276f..d6f598e040 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b6bf2e9327c7301957c160cbd833ecc7deb3e9f84c36372bd88589e7aaf8d290 -size 576543 +oid sha256:137e6b3a21d8f05cc9a32c174e921d49badd4dfe6ffc49a8276fd4e4f3885092 +size 576643 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 84f0ae5e2d..6b5125298c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6948f15bb4ed5f42ef3bd27b7e4d00e8ca2333c23287b3e6e490391b3dc3cc85 -size 646482 +oid sha256:ee166e50cb262796b0bedcb984824570f4d81a200297cbd4bebf41e48d40ca72 +size 646580 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index d1c05c5417..a09d7ceec2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:53fea90b52cd1a1af33c2d504d810c49410aa60cf1e038e9b0004e51759d3e4a -size 560337 +oid sha256:d4eeba58c3bcbeb8c31f3a5f9ef3518f404561fead9679938c5d45851ff371cf +size 561225 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 8404dde3f3..b6974bc54e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e4904352ca07f8d2544d79c69cf1add8bb19bce2141c94d26b7e007f3c33ef8e -size 672356 +oid sha256:dd846c32a3b100e7cbd97035fbdbb0914503e400a7c81ca8d041b7e62c134726 +size 672454 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 9bfc773c05..8fcdfb1dae 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6af81bfb7eb254cd88e74bd339b7f85bcc41608ae38c79e441b3b0ad1de3e2dc -size 583991 +oid sha256:f4047601ea0a02102cbd6a5b14637f0bd547d1a526145515285284b93a0e6173 +size 584089 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 61d848274c..5d24a750f2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2e38baaae4b26b05c13e889817842c23bd39fd0c9c4b035e1081ba63acaa7bf9 -size 707244 +oid sha256:6498c3c4f6c0bbe915db89e5054f4a5f2084b671f06c7e4e1b6837ab8676b637 +size 707344 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 6c7d34f985..39a7f0b31e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d26c4cb556523a7124827529033333ed45e3f74d112253d23a6e92df5b839ab1 -size 623024 +oid sha256:b99fdb0806881e55fa0cf0a573fef7d8ef8dd375b171e7932de659dfb258d8a3 +size 623124 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index f59f934205..4419d71c67 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:38877ba1ab9c3110f340ee3332705ee7c9902208fd16e77952af9b96b31c9b9a -size 732084 +oid sha256:465419b6f03b448f53be4566018396c0ed26db7f75342bb0966d3051a5cd9a88 +size 732182 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 3411cc342d..9d6f0aff00 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7610c8fbeab89e5156bf212b3d980ea91491df8db9742b84fd1bcbb248a4c8ec -size 647518 +oid sha256:16093096f580fbb11566dc61e6e229f0bf4eca480ccfdf09cc6fa2f24908adb4 +size 647616 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 02102545b5..7f68ff4236 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bbe15b9fe5d2cecda26a523a0865c60373423dc3765bb022dab550469fb22da7 -size 726306 +oid sha256:bade2e7d87b4b6c6d70015bce0a9d5cc51e8cdc2a2a5a5225cb8ff5ccda25ae1 +size 726406 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 0a772218c8..5ae73b23c1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ce5bb57118ef0ad12a30328512ae4a4eadeb500d84b23e181553f26182bdb396 -size 641298 +oid sha256:4346662fb0dd674282aa427e2d0747ef4d455164f2e34f49e01b1a6a8506fda4 +size 641396 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 18cb638018..28df61e311 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a8a2f278843b4d34469e6042439bcd4f46ebc21498d9588e44f25cb18fff35d2 -size 750948 +oid sha256:47dd3b7202d4a276e505e2cf7dea15257b5b9b70ca8933b68da038c147276a33 +size 751046 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 03de6051c1..8d6a5b9fda 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1ff00d8585bf134d8351149e5fcc4e218a1192e8198411f391bac498cd32fd8 -size 658736 +oid sha256:c5b53cf8eb54c39961031cde7332e45e3f9f1a9da4e37dc66af3c0fb0212f541 +size 659624 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 6fba79d168..7d4c075488 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b8ea96f80cc7b2e43ee832ecd4f1791529662bee20bb3ebb75685093461cb1e9 -size 719154 +oid sha256:8c11870fec867756ea9a9e530871f96db0f9211acc2ba974905aec371027de11 +size 719252 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 4393f80a23..a57e3d8254 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3b38a439f02de29952454057587d82894b96f4266876b55c74ee1a2cece053e2 -size 634144 +oid sha256:9c41e6d147d22fa786248bdb4591451c47c9b2c4720ee4fa52514b557b4760ec +size 634242 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 5d8940289f..a6ff636a18 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2db9b30387955590247ae5688f5b23f1d28ce4040fd1868d052de9c9370580a0 -size 743794 +oid sha256:425dd2c2aa39efa509e915cce07cc790fe241d552ddc6e89e2a020a56233ad3b +size 743894 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index ca7b5a8306..93a9640ae6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:be2ffc8227def9cfcd34edc2b6d179e0b66694bbb2e40363fc162fb14e3cde76 -size 651534 +oid sha256:6f833afeb90dbfb95ab163272235ffa836ec19505640000c7ab0837a8a8cd75a +size 651632 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 4c935a780e..9286607bc5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6c55f9bf202b9824ac182745f1ea9775c55fb9fa04750b9990d9400cabad963e -size 729808 +oid sha256:2af3634ccf824d3bcfb8cd956c2a729fc1080899fa1d4badaa750ef45d0560dc +size 729906 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index a3a63dfb81..e614d4be7d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:79e5e88f37279bdc041b9f4f48fc1ff6af7b501c71baf10157b48bb1352f4e5f -size 645736 +oid sha256:4549711bbf704fbdc9eaf7dd0d0b607c07ecdd1f479818da3be7a15366f2ab91 +size 646624 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 86cf815d4a..b841e13010 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4b2ba28f659f703ed0ff82b30e415ce8d8397e8c303f044a316fa79a8db21108 -size 755584 +oid sha256:3d54a14b1d7dab6b0777d35e688f24fd8853545431fcd03fc746b9ddf5c575d6 +size 755682 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index a9888c9d8f..afde5d13cd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:063fb4341830f4f26ce1d97737b358d8b734789ad55cc3595b4037c700e84e84 -size 664210 +oid sha256:102cedcbf81cbfeac263db7a97b40c2f87cfbae9732268a0909e07c9ba128271 +size 665098 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 9dee9f2e2c..59b9ea6625 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:38cedb081bb51b9883dd8e4290752a99561e1575833e55406334fcd508476799 -size 722654 +oid sha256:61384d004b170afff410df37e7132847c92e4ee7bd1eec2f630a8958454c3077 +size 722752 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index ea1d571504..9b8e34623f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f10ece912c7be0418c1eae8207b200b3914d687aed878cd3c2acd9784897fbfa -size 638582 +oid sha256:0c9d171a2e99da20d8c4c03fbd6f368ae0346b50bca329b2c07c3a1711a34a93 +size 639470 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 4162210a31..b9215658bb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fcd4084b1e97e26c71b094e1d3db4c603a8b7e92c38de288f09e4ddb1e004518 -size 748430 +oid sha256:e4828907f6768c2b4726a56dc3c7ff52ddb2133581713d64dfecf93f4667e526 +size 748528 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index bc09f57fb4..d131a61b46 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b60b8f945ab1d526ac45fe0c37f7f019b667d6ccf8d82cc3fbbde72e147e253 -size 657008 +oid sha256:183edfb39856b3922df7aa267d47b505750d3d0b8d2b6677f63d6a4088588be5 +size 657156 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 43c222e97f..1fa8ccab3c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a688fc651b44459e066cf43693bca74492fcbb1288fe09f0487edc8ff7c17a57 -size 795306 +oid sha256:236161dff55e8c1a74679015d96fb755376cbb4facca369a1f07173f8a3fcf6e +size 795406 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index d2bdaa5689..7cd7b0888f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:da6f3c24e513c95334ada83c7d920b92d35091c8fc9badf94983aca3466ea321 -size 705512 +oid sha256:9dd31de6d67ebd49d074f733523a8ef205f5e339db037f76ffef173683dabeb3 +size 705610 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 60775d2c4e..2e2a8334c5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4f8b97cdb16274a9225577fa388ee7e073270b96a12749458a0a158fc237fda6 -size 821182 +oid sha256:c99a110f161966e1b941378e9df8f60794efdb7dd957a754220855f7b39710e6 +size 821280 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index c6381ddb2a..2dcd8db6e4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d427d4abf0b461f5283df91444a05f11086deccfbc70d8697e6814da33937532 -size 728920 +oid sha256:89ff33abd38190abc9cab4040a7eeec077a99d0e1c45d4afdecc34cceafa707a +size 729018 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 80321231fd..b13cd1a954 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:478f11d4e28ecde2e3e76092599c3a8f4008123f7623aad189d70690c0580925 -size 788154 +oid sha256:3985094847bc18f1d0ca263bd6f6a18ebf6c1c649bee1868dae325a3232275ac +size 788252 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 3a7d981eb8..9ca3a0e800 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0523b404189dd8d11d942e8edc8b734f245ba975927be88066a6d3d23c40b8c8 -size 698358 +oid sha256:95ce07eb73c7709a4dc690c5df5988205961b912ed1f65602001c1d3007bf898 +size 698458 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 6ae29d406f..0e48c9c1c8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:91a042c5d5ee75d0387b35f02097d360274695c09105818edd14590e509d865d -size 814028 +oid sha256:a007d172dc88f7194e0e51347d9d94e846346c5842ee34b7859940964ad60837 +size 814176 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 4716772ff3..ac3c13be7d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d7152bcb54aac03cc54193f5973e7f055600be58b43197012f7b14e5712f61bb -size 721766 +oid sha256:921059c435446c106b9bb945e55e9b704a7e91474055e666e0af070927419835 +size 721866 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index f55dbb3a38..4c4f096b5b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:73faa7c1c56ab336aa2b74d58ea4a744b8274f6b0b3e3dec302756e9caebec18 -size 626208 +oid sha256:e2a939979d2cb6ed361a210f5ebb748a3dc9dc000b28a483b6a69dca6b2fa845 +size 627096 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 42315f1f5d..00917880ab 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:916deabe3caf83aa3e70a11f063c3ac259456e5130848e82526fcf69790d6268 -size 538977 +oid sha256:219e553771500d3d1e6d9f0c0a39b92a2e63fed3a53ca054dce9adc9bb0f0f49 +size 539075 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index e0308e727b..83dce2cc29 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6932bf47b94f7048ae61e21fbf99664e1c6328e56fa858e0effc14bc30514a32 -size 626206 +oid sha256:7f479c33b1128c91c7f4d1ed0dafd160a5bcf2b978add661a2ea52434ec344ff +size 627094 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 2b53e47e70..3e8430828c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c96830caac30ad99435a486434880e69b2a3e84444ba5cc8362ddaceea969a4c -size 542281 +oid sha256:22e6abbce712112323c341f492908ecae15ca3d008ea5698337d4b4984fbf7d8 +size 542379 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index cdfdb09d94..8eb41026ac 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:559aa09d0b8b67f62c60def2d52626709c19eee3b88c8748e46ba14d0ef209b6 -size 691260 +oid sha256:e368d7890e405bf08e10119ec60c857aed960f9c592e1e2abbe93a089a09ba36 +size 691360 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index efb2d19d82..548e042694 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5a009ecafd8610e346f33db3c68950bea325dd8968b3eebb9dcea58d9de201c3 -size 605659 +oid sha256:e6b84508d83a6bbf654c6493512ef6f174fd4ecf73fb817ae839a36fe319109b +size 605757 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 91a78c0012..a50f0f8379 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d3cd09679606ab705fdbe96fa98a2b3f10474d732d960be44a3d5030694b4c03 -size 714714 +oid sha256:933a2d33bfe8429db0460a5e70417c643fa0ab26f2d0e7b450cacdc35acc9529 +size 714812 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index a1eb6b59f3..2a41767d0a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cfce0bfd211c909e9e9a18fdffd8d05f659b7037ac88bcafb63f528991a5323b -size 628570 +oid sha256:fdeffafdc3f18701c6a06342128f05317569da0a469d7ba031e06f827086ab70 +size 628668 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 898819db5d..69924e1adb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7d0dbec1e26f22cec9c5921257a7e57cd7fe84f64c651b6a824c45b1ef43d96b -size 701196 +oid sha256:e72c0286d01b01176330622e9b46a03d995e1bff36d30f3c867692b3a760773a +size 701294 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index f95ca2b9fc..e85207bfe6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4724c829d9b8c38e8b9b7b723aa28967b9be98aafde8bca7a7c320248c4b66a2 -size 615051 +oid sha256:e92b7e7b8541d583651e7b7c0d607a05bacd6c8ce9f4f7fb3e27a094fc9856b0 +size 615149 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 410d268838..1f44651250 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bc53f9ec34fa350e7e0fed8f59cd292b30d2eeab67e155b16ce9ea53452b1fa2 -size 715352 +oid sha256:921ee01b2829c1b02e0aca7b04f70e338b1102549b88993e45b3bc39e4427f72 +size 715452 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 86b89b21a3..9b4b2f0886 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:250172c9063650ef883d53f64374cec6a293707f030d365ec8fa7edc3b3d903c -size 631676 +oid sha256:64abfc1cb242971b7fb08e3998e806632cc2ad850dddf56a3628f537d6a378de +size 631774 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index b19b4a800b..faaaaa1181 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f0a5248309c710425a2a1f441f373a4077ead515f1a71b56d2b70cdad6731c65 -size 701836 +oid sha256:5f70e88e793f1a9da79b001bc5e76dfcde112c51296c6340b999d6ff064fbc2b +size 701934 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 0554785a35..5ddaa3f97c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ddfd4bd6ed317f08182190f835019be8a4229e5d51ee2edff243ef21b545e232 -size 618948 +oid sha256:9a4188cebdcbbe9a16d43c884beef11439339c932788630382bdcc73129826a9 +size 619046 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index e8e7600fcf..08584f6eba 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6def6c470415b1d3ae9fbed8651b4d6112864f461a6b35e1320906c54fc9121d -size 786870 +oid sha256:e69b18c6e7673b802cc8cb5f076a74bbb2e131eeae1ce5118dfa8dffacf58514 +size 786970 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index bb6878779b..f82dd5161c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:181b089ab7d906da5b26b88628e0e9d64826d3b2e5dd11b6f7ffcf11758ba55f -size 696434 +oid sha256:c1c33ee376ed9da235379c18a957fb02e2849ad348424feec6529970cad0765e +size 696582 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 232a57e77a..8e8241692e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:58be39b8c940328901a3f431c1025aa1fe1eafd7b14dad371f25afad8187efb0 -size 773402 +oid sha256:69cb0da37d0481b0998e87e9ea3e74a08f914c8d3210a7240b852a62cdbc497f +size 773502 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 0c625dbc3d..33533319a5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c29c8cfe5ecd26a0bd9662d13d871f1c7c28bbfd54a2763a0ef7ceb52a93b664 -size 682966 +oid sha256:7ab63babb44df17ee674e92132dc8d8b505e8efe7681a6c103db5a44bb32585e +size 683066 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 13170b7b31..fb540f018d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b6d0f3fe198e901e0fc88ed57292e502e89dd6a73ae174af287ef4e739bdd2d6 -size 661526 +oid sha256:8035ddfe15168618c3b0464479f947a600bc2665689c2944462899f1273a7bcf +size 661624 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 5b93b1e9f3..a4415e33fd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fc22258ed639f31b2f60fd3348c0531356658eee83cafe28b93fb08503a35da8 -size 533645 +oid sha256:87616099663b4a3c4b377f562c93929922bdfdcec50a17c3268be19e507abba5 +size 533743 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 53dafb2b64..e560bfba8d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a1ead7c74cec4d46c71697f3cc0f01ea68e5afa16a2aef9b4c2aa88518dea766 -size 685674 +oid sha256:0e8204db18b4847c6201b0a78bc3324c2cde1e1356dcdaa604c2f62b455a83a0 +size 685772 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index c850d21b67..f12b6da2b8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:98a60d6d57a3bf8606f23dcbf8141dda8ab5317165f79d2d93732d4c5792b2b6 -size 556757 +oid sha256:170e476212b5978be7f86c078dac9c3498ccb7f331246c76b9d6c20f448b9be7 +size 556855 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 935e7b149c..e0624cc84a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1c45b6b6ea1b2ea10c9aa522836d3e849bb5930988c65e4ca9743cf6f588d48 -size 667246 +oid sha256:3ecd09c87342fc9eae5888b05c56cd29e2957ab3c2e3b8f21ebf08c4ec265107 +size 668134 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 4e8555348b..00269cbfaf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:42bc33f09f582dee0d9b5da58306c64a16ba65669e70efba5812f42000f4d25c -size 540501 +oid sha256:fcc0c6797da1175c7015e7b8a42e3cbabbc43c3312ec6f3b6581cca3b9d7d7f8 +size 540599 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 731076cca8..e87303eecf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:17987d0d40a7e562a8f24f10f748f0c37ec456d81cdc6ea345df14c086b07be1 -size 692382 +oid sha256:4a6c07092d97900908e131dc5584cca1423ca6fe2a326c2160a06dcfec95e245 +size 692480 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 46bcbc70c8..1efdaccd50 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:180375c77bb1d082ce2bbb97ce9bb4bf40fa7e4ca24da602936a4fe9b54a2a12 -size 563415 +oid sha256:9237f75e259ce4c2c9ca40d6db223a238ba3281b095b3b1c35e2f6aa7336bc3c +size 563513 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 60495b0fcc..85acb6c70d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f6494fe880b1140fa3d489b37ea6c1ceae40800d27500687bf48c53628d5e2b1 -size 729786 +oid sha256:73c36c773c5139b6a9e1e7e5822aee16919d1921cffa94b14008b4b0a7b1a4f7 +size 729884 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 6216670ef9..cf2684e8bd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2fba3b3e1ba86e1ce457d286c711d3f21b61a15ef61ff9fa5ed693ef14d141ce -size 602447 +oid sha256:127e8053d3bce56170ad6324d666723c4e67d938eabbd201f18bd1a11afee014 +size 602547 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index a044fb7b76..a931f0295c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:455cc09542bcd0533dfa2292f340c613f2d7688be021dfee76a31f6a27e2da77 -size 753688 +oid sha256:d7399f6efee5c689ca53a7188f09f2f87d2fa0e7daa3929addc6cf699539938f +size 753786 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 5213b6afaf..2524341b80 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3bc90ab9223cef9bb90fbffc9f6f5823f736578f679a6026c03fcbf2d82fcb4c -size 628224 +oid sha256:75f9530565b69eb9625fb4ac6121c477ecc4d30775f02dfcfecab36dbdf598e2 +size 628372 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 6d63cbd8b3..87e904eaed 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:49bdbeb5428ec9e6a26b3b58b5296f16224ab734bab5d2891b0dbcebf0d98215 -size 747862 +oid sha256:54eaed11ff79421a2827ab525d48246c4fce0e695ccf3c3dd193f2d44ccdd93b +size 747960 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 80fb91d620..d9e59f372f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:98a412bac573b01b44248e88cfd2b2d2e809e0e4240172b498afe9d6d7c9bbad -size 616625 +oid sha256:22da57011780c4d6c4edfd3d9d637b15a0a95361622f3a9f27fc2c39d3472e80 +size 617513 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index c3a5cc30a1..0732134f66 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5dca556a84f174ded71701c8c450ab45e15b44231c2108f16c36075ae3ec9a64 -size 771614 +oid sha256:57285424637c77dc4bcc49f7a2c4abdb3bdca52867758b3d54612b82e65e636a +size 771714 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 78ebd556ab..6aec089596 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:47c1549a0f38e9ccbb4babaefe309ce993a47abc3f449ce7bb93a2f8e729c51d -size 638900 +oid sha256:95cc81260e5bcc6e97abc15af7241d51b434049c67c5b893ead0b2d9f7f84492 +size 639788 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 8000aaa62b..97f9f13492 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5043115eb91ef72cd08aea8b8311db8dbd4a771ed1fd840bad928a7e9396d451 -size 740708 +oid sha256:d845585f8abcfe201bed15f9603f7af560b13341876bfb547396c957d61bf73c +size 740806 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 6d8c897a16..4f61a8d720 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:df1ff773f515306f53b9d76d4db5d5e40f3e39614bffa9fc43baf34b873ed319 -size 609473 +oid sha256:f835b216d7a96f0f832aef9cc340cf7372ec2929a0f85fa9e6d79706c251e6bf +size 610361 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 15e34fd068..8b0d4b4b18 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:de736d7ebc398e34300a8e49c2a6e8ffe2e9cdc043cdc58a1125a629e1d3059a -size 764462 +oid sha256:9b942c496bca4f03310a910304ca2ef7db323c75796b585955f7e45bcdeb9c62 +size 764560 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 8e1453e8e7..dd71f4c97b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3ff7eb71cf115a1c08f198acfa1866bf008a30881f890fe6548e8fddaca3f017 -size 631746 +oid sha256:cbc3ad1e00c02b6363f40728a53527d5468c46b1a9acbf6299848bc191cec24d +size 632634 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 4030060e4b..ced0ed2be8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:54e99445aa32a68d554b8af3d448cb9457b4a68c496b3a651aab4fdbcef4251a -size 751362 +oid sha256:0c61466e9d710bae8773c331b18bfa365d810e497f317d3e2891ac88a58f7f09 +size 751460 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 1001626d92..a289a8f1be 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:87c539e7a0dd98199c78277649aecfb5e7b0a12866fab8276fad566282d08233 -size 621312 +oid sha256:f3b114409497a3305f63c4f4fe2b15edf5b38f6069e2050532844b91c8d8a035 +size 621410 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index ecd1483da7..4edd53f8f2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:02565e3b75228505a2c7377592599fea4cd17ed24df47f166655c4e3c7b4b8b8 -size 776102 +oid sha256:40f694fe649c27a0af92bca7df1c50462f4a21048174b78e0072c65024ae78cc +size 776200 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 580fa25962..248e167534 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:24d100cba8cc26d2898901e3f723f033a6f9e124af26410bfe19a2ba72cfebe2 -size 644424 +oid sha256:9e9a15400cca956352970437de2b439d9d92f4c871b06d6531eeeda8b153579f +size 644522 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index f3ad866c03..d002c18c57 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eff08b78f36ff2c2aef3dba190c2ffaa412bdc930de488f6e2bcf30182cce2c5 -size 744110 +oid sha256:b2d1e2f992d20319299a1546a53f446286a11008b960b112936bff5d134fec4b +size 744208 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index e11d59ab09..ac50d43544 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:00d536bb911b7663cb8e80ca50cd32900b5737dd9587f4e23c5afe518f694432 -size 614157 +oid sha256:8891e237aaffabad19edde42914640764660402de310cb3739b4fd7f1756fc75 +size 614255 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index c505313a5a..c8e014ff33 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b6303a7955a4162f1d177d8eb648bb76985eca146131a77a8189d879c55e63e3 -size 768258 +oid sha256:22faa7f3190331e158bcbe7d732e7c5e1f2e9b5170566154b0d449733c545771 +size 769146 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 712e70bcd8..a31bdc8034 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f8c72d17ac5ef81120b0295490a29da2e93ce63f601d1c4eebd8ab1e6439e19c -size 637270 +oid sha256:0b93667edd03ae47c2f08763cf80b1e73d882d1b480b8acae241c41737a97c9f +size 637368 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 6a7eea13d5..8204635333 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dfe6086d61ff07d29997e64ab6217600b8639ce76275164b1f8bc6d138076a38 -size 817650 +oid sha256:c2a73b9e87f2da7f4760811d3a31e6c0bbf33d167419064979222e132ce88e6c +size 818588 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 034d8fc2a2..f6856e00d3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0e404054250f0f6d36de07526859036550116ada4b8bc4d7f935401095a2c2b3 -size 685726 +oid sha256:56d43d20202ff78ebedec91151dac63da623417ffb10b2b43fc353abdaec68fc +size 686614 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index b922a71280..de8fefd647 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4e477c5b7adcff0d2bc193eae798276c343a4bb95df9ca65a50d96876f96f1eb -size 841946 +oid sha256:8c491c9c4b3d907584916b3ab1c1d6202abfcf7ad9b6b5e289bc68fe9f5cdab6 +size 842046 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 7c4587130f..c2133f9c97 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:335a1c2225acbfb7ecef247743f6f24d6bb7488d5fff85cd7748e161bda5ecb1 -size 710318 +oid sha256:dc5b8ec364464b2984bcbfd3beb854fb817b3ccab2c38882be0d75254cf76611 +size 710416 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 6f11c8018f..26fe71bb00 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:24cb6109d3a3568f6d96133542c201944dbec41f64301927f8d3ee8b9c1154eb -size 811336 +oid sha256:5a89df60c9dec1bcd84c7d25d4f730ed2940d44cde76ef87414b211f0ae87519 +size 811434 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index bdc9f80249..a29a038352 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:514b678e489db76f7c8c8cd0fdf17de2caa3fd384e19fadb4081a4fb67eea1d9 -size 678572 +oid sha256:62d8aab412cd308bf8656d374af033383a8c6110d89a31e7ba63ecb2addab56d +size 678670 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 53bdd48352..5fcefe5a39 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba1eed8dd3ccdbbb686958cfd60a1f5418965bdb53206b5cc094509e390a84aa -size 834794 +oid sha256:ff13d9f5a5fa808cff17b5efde5e714a855a219f77b1401cac808c8afd481408 +size 834892 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 12eddca0d1..54186e3a4a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:424e11f7a5a71851cda846418d33278f9cf67e1cb2ad5527acc33448118178c9 -size 703164 +oid sha256:ef437e2e3b2c4393b0365eab3f3984f1c49fc2283e57a0925da4655d1970e908 +size 703262 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 83e1bfd08a..e00d60f4c3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:74ea2cf12bee2d2668e2438634d689f85a2b04f064ca5065cfd020b5a0aa07a2 -size 642488 +oid sha256:0935e5fad4a9ff6a5ef00560db6c1b9c11e48a7bd0f8881f833104d2c00eed9d +size 642586 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 9c8048fe4c..207f6662f8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4039f8d64f67697e65103978356d3e55c238c10974b593245d7f2a37271c144a -size 561079 +oid sha256:5e6dc029406b6bb15d42528ba422cae3250c2c4cc6ff80d9cbbb31169805486a +size 561177 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index a1ea64c8c5..d171d5fac4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4a4539a5f989c98450c0980cec77ec0f33ca7a9c2253e164cec25a415db48ce1 -size 667572 +oid sha256:5a89a21e88c727468306d9c24ba9cbbdd53b8a4bcf46bcb2a19ae669ff6281f6 +size 667672 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 375eb72015..96b3ee92dd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:68c080e7840b244246a685f35a13412417f216b66c618ffde2445baf9f9f898f -size 582463 +oid sha256:60a1daf9c96d7db9e059db564ca379e42506a7e697aac15ee8cc586088f94358 +size 582563 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index cc499f21cf..03da38017c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dc94998025ed6280c9f1d112d1112c751443ee6af591d074c1df075e0677c292 -size 648158 +oid sha256:d40b03dbc1af3dcc121b31d23b776b13a85720563b2f246ae7168232500b8085 +size 648258 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 3ab5dc31c1..ea3d99e274 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:936097b97327669275bfcbb44539b84d360eaaf8dfd1c5d1878d582cb40c7653 -size 567835 +oid sha256:6ffe291f4d88798567999ed8a0f8ec37bac54bb0644e5cb728d3090544396643 +size 567933 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 7c2c022a89..9537afccc3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:51c957a411db52723a0e2d5b8f3a701ac5236384f831845e7354328b832cbeff -size 674034 +oid sha256:924f26d4968b8143a60f0eca3e70b82ba2ef6283083786d60129cfa6194864c1 +size 674132 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 9e977f99ef..e72813af62 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8035f4ebdf488ea2657768b368607e64761198267ef9ab65f6e8918a6563d194 -size 590453 +oid sha256:d3a6d19857118a20ea9882ba4ad11535aaa6e52eb396176f4f0a1bfd5fff5333 +size 590553 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 171fa8e6e2..4aa6f636cb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e040f51fe4447d2afb12f1991a7fa0afa7bc983906d51b365e6852503546556b -size 709958 +oid sha256:f90f4bb9de9c3548a0a4519454231fa3bda108e34fe404e822f97bbc01db6a50 +size 710056 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 25f1e7b21b..15cc3eb4de 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f66950990003a0559be0d0876fc18cc2a670bc4d1ae9f18a0e75d70a7a143c32 -size 629488 +oid sha256:a6992745fa063859608d117eac072c7e6c0f2c39d847e0459f0f7e2c76c5ca1a +size 629586 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 8a4b0a53c5..bf3f06d984 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:72a214d06407413204501825dabcec2a9f8cb3d10db5dd9ac8f1301c29038245 -size 734796 +oid sha256:03b05a0f62b5601a31f0065e5baaba9475e5b091bd9c9a02725946f3d52835b8 +size 734896 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index f7c4f18cf5..c4befbfb37 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4b851b1c56cb55e31a75d1cf89b00d1c96a3eab117adb36857f916abdb42d253 -size 656052 +oid sha256:e5fc918e8e3ec965c4ebbb889637af3e746656f42cc3bb08b2360bf5c48fc744 +size 656940 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 75ce3399c6..dbd3d0bc34 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5f64b608032c86c8e5a45c20d4c8235cf26d61dac47c8806b44b93cd3a61f294 -size 729020 +oid sha256:9fe417c1d486a9beb639ebfe343e4cd0a49d8d95539b2e7443d153f1c0d2d97e +size 729118 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index d46cb7d1eb..19f077fa14 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:311f357d541e47c05fd4169224dcff96455c1f213055788fe888a3a1fdb15496 -size 647168 +oid sha256:945c8e0898b51be299315c89f503b453f34caf21e44489205f31176ea6226492 +size 647316 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 76f299a9b8..36d10d2800 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e6a99f5180e9ebefede07a3dd29d444b42e802a4a6bc2a73b266a684a84701bd -size 753662 +oid sha256:50271ad709e055be39321df1dfcb56a1aa755ff95da4ecf46ab8d521ff1ca18d +size 753760 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 0e8100937d..477c2ef4ab 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:919cb694de5aef5e9260ee653b78ce3d31ac7b5cbc5bc12d463ec942f9304748 -size 666382 +oid sha256:a6b0486878071fcbdaf3e3ada4c6d7e93e27fccf18433e4a23e69d166418c5e8 +size 666482 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index f44439bb76..20c1ec21f6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b2c4b65cfc20e8fd44b8f61f43faf5b20c1d76f5fe8e0ccdc5acdc6382ea0317 -size 721866 +oid sha256:4ac32d42f5924cce5e5c2317af5cdff5a678d7cb25c6c4a888216a55ea2ed0f4 +size 721966 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 4a8e39d71d..c06211dd90 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2f62bb41a718b67c64c49b955572183c367ea28aed05eb4ad352967a16db3bec -size 640064 +oid sha256:69cf118ffb1780a7067d291f4a6e41e9d1080aaf8d1b894f008b58dcf6a25c27 +size 640162 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 925b09b1fd..0b7339b3fe 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0d61fb0bc2f1f9c4e7cf202c689e1f800f6635ae3852eca1be7d139f602f4e20 -size 746508 +oid sha256:56e4e436c8875a81a9ec04205c88491b5baecf26d7166ac67f5da19a60bba8eb +size 746606 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 4393d4b7ba..2be5b97dfc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dcf7123df5ace745476b8edf8017aafe916e25589a65a26a05ad47e6666c052e -size 659230 +oid sha256:1bb423e15e99abc64107de66c7b357f0676f50a81e9fb69968fc676fbb121a20 +size 659328 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 49a575c089..50d9d30bd3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0a1095c32e57a0194ceaa6646670b99f88ff72dcd257b3a48975e140d82e4cda -size 732274 +oid sha256:0bc3a8a74f5739e01a9497efaa6e8c9fa3800454004f9ef2389f96d96557be01 +size 732372 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 5b7f5af49b..4040489712 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f81025e8bf960934b359df1949e97d6f83bc00d33feaa270f2c4f8acaa7f25c0 -size 651606 +oid sha256:b658a1a88d55021ad51872166046e7a8d623e9b10059ff81e35706404e2a3b2e +size 651704 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 97002a0f14..1785e6df52 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0b2a9a0d824f83a6e72a41f8fbfc1f4bf452c46c2ac1aed8a9f0168e34f66758 -size 758050 +oid sha256:b786b1e6c94d5035d8210ea73833d9bf912c49c890e65d80b8dab72a9d436f85 +size 758148 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 5fd7003b5e..fe6d7a3164 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d35d06fba16ebdfbf51038487263d896dab0161e8798ed8c565d0eaf66f8a55b -size 670624 +oid sha256:66cddaddce929b58cf38d8e1cd90b855a652404eded9ab8ee738cd4aa4b90765 +size 671512 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 00b4e63b2b..121ba3702b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:33f5e845df739978765f7f8690714a745e1f70ab5ca9561680cc896a810f24e1 -size 725120 +oid sha256:b1425699fa9b75cc9169aa5707ab02a3eeb0af9330ee77505f16b224d9c84519 +size 725220 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index d5ff9c3291..3033f29286 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b2eeddb62744275192d82efe0c2de76a39173cff875256c316f07ad0cd6e1f4b -size 644452 +oid sha256:88b547935991491c95081a5f437aa7f51c83b1f447936c05b8b9a856440e3386 +size 644552 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index ed84c96b46..2b6d3939a3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2237d75d3da06ef2f3f3c540752bc6cdc957ee7b21046bf59b1a775da1e52ee6 -size 750896 +oid sha256:88b02374ba1014a134c30f874a7fc3a810ec07f3f795cf192d2e05f682ce6a43 +size 750996 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 984aafdab0..38de5ba6a3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ea1521759cf3d16a4880fd5591e99bb216573e68ca1eaa8ec58f013331cdf938 -size 663470 +oid sha256:b5200759a5d96cd706f21f9dbb1f10c4c827edbf0e91ced3460838c12d3cf2a6 +size 664358 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 234b425543..87c89e54ff 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b8e9db7dbbd7309d8588ef6d5e05fd5c410895fbb18409bba9c71c630eefd98b -size 798020 +oid sha256:3a2844afc121da86172a8e7e4851d32d8400061b0726eeb523a71a119536867c +size 798118 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 778d0f1313..a8208ecf96 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1ce8a70c00d305a03eb3d898309b8325e796cd358bf455cdca9f6175365ce46 -size 715280 +oid sha256:f288796c21e4496ef840e3c37e9437cf219c35b4a33d824eac0fdd4cd91ac6d5 +size 716168 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index bd0b9ff264..726a4f786a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2d5c09d74a1d9aa9945f6abd1e98b78c79cf4bb183489cc1a7ced66fb68ef458 -size 823894 +oid sha256:92fbac145d24a5194103ca55fffc1f87a561af121a1f6a2401ac0032b9e4f571 +size 823994 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 1d00a54eec..5a901aba21 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1a26b407b14684547f5805b70f21f87740b5ca5eb0f8b6813cd081ed305c88cc -size 738194 +oid sha256:a322d4646438fe67934ffe98fa0917aa0aca8e2311073598c9a6cfbf65c8d044 +size 738294 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 75eb0f47ff..304176e8a6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2581d36c1c33b98df048fc9f0c1ecea8cba82690664215b3f2d2a1a066841a1c -size 790866 +oid sha256:21d6c6d3a27e965158d5a6065f0e3b5adf665160a8062005db44cafea364bc01 +size 790966 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index e229344ad2..5ebb0d0992 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fbaae3d2b55e5998b115e6e03fcdd477f300921619a66d50f6d26693507a5dfd -size 708126 +oid sha256:796926093a1c0797da67372d9c65aa12312554f5f3f1c7ad473f8ad65148e7ee +size 709014 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index d8ce0f76f0..c18010321d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:af203815b38544ade11da19024efe5dd2e8278904d08f2bd2d8dbbeee146ac69 -size 816742 +oid sha256:1d54ca0908a29e669751084192efc8458a4409933848730baaddac0286ad6ded +size 816890 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index ba6fd50ba3..5ed0d873b9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:86e2219617cb7a516f9ebf9954c8998ecc0b52c420233e25e0308308292c4f64 -size 731042 +oid sha256:cee1b45cec3ed850efa4f8ccf9f76e0b814b74d72581e7a11217a165a3b1caa5 +size 731140 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 1dabe0f11f..de3b13d752 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a62244efcd1edc915c0871348ab901c3e70bece598629d95c55f4a9aae7d76de -size 641550 +oid sha256:de7e8a3430ad3ba0d38087f00e8d163634475cdf8b7b38fa321181fa73d0fab0 +size 641648 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 2342e91385..46992e892f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aa4f943822181d29052098680578ca9f0fa17b7d6a9e0323b3e91fe36b4b79a7 -size 550965 +oid sha256:f119839edeff6dfb90f411fab59a1195c0f8f09e1819b958cbf75423038c051a +size 551063 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 5e9ce2ce84..32503891a4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1d3747ff840b511c6377481eece45c436ecf0168d412a54b94ebc8b9dea1dbe5 -size 641350 +oid sha256:3b15a9d4d1de697f31430827df65f3fc499ef29ecb0fe2f4e464aabc9538c45f +size 641450 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index efe7e85269..34f8876066 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:725173a69c4647d0eafa2a9b3f3ee679f341f24f498f7ebe386f6b4b6728efb1 -size 553479 +oid sha256:885010870417a1e30557d34d7d43ca7b9877e9fa6e005439cfde90190b1760c5 +size 554367 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 37f89ad322..b360f2c2dd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:650d94c4a386c63277a32c7ba43409221acf542abefb92c8f2f7041041c9ed33 -size 705616 +oid sha256:50bf7a33e8ccbe627850d0a5811bf72e1cb0a4b886abd516172ee6181fbf3f42 +size 705716 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index f7cf9734eb..1957afba1a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:242e0183ddc752892568865c69d8726856ab80bf0071334f21c63545b5db5e60 -size 617647 +oid sha256:030121627c138918356c872a03f2d95e7740dc7c7d5225ad6f54a6030ec370c3 +size 617745 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 462d1fcf5a..22b563cc37 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:943b0343bee6d94fe0ebb9fe6c0b6bbfaad2eb34fd5e119a93b198297707b306 -size 729070 +oid sha256:094d2d650539000f37d9ae2ddc86818f943d9490ee0acd0b31fbcb63c5fb84d9 +size 729168 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index ce92c2a347..5aafad779d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3231bcc85edcf5d9464c50fd80b369a9c1aff513bbc70204a2b67027d8616e75 -size 639670 +oid sha256:ba87726360de947241e4cb01d41e659615de3b93d922005eb7c9706300b43d61 +size 639768 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 02c6672bcb..8f66570290 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5d297b16d5a3efa2d78a6f60d3a1553d8663a468f30d9fbd2ce7452f292047ae -size 715552 +oid sha256:e5d0b8718bde852c3b2624a0b4b182e9eecb3903cd568d2c86d27b3ddc5b7868 +size 715650 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 283e35c0d4..f18d67b074 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4cddbfccd18a39ab064502305632b816e0b893bdb5ecc08831513d486f9310a6 -size 626152 +oid sha256:4f5180e3e571a60ed12a42eefeaf9794122435d53c6d5a2eb0cc98539d8c7482 +size 626250 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 47790263f7..a45430eac8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1867ec52f3f2f901d27c5bf1ce2b3fa6b4955203c24118720054ec98197b6089 -size 729660 +oid sha256:b8a478e0f9eeac0ad25a9f62a4ef2069ca8e32a0c6d917d15e371115b5a5a73f +size 729758 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 19593eb9c6..a305bd104f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e29fdbd4012bdf8505d50690138acfcbae6fcb551a123d41790c026927ad6768 -size 643564 +oid sha256:387d6ecfef4f4adabdcd13ab747e2af746c5b8b1e73f30b71bb3cb150ce73824 +size 643664 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index ca2c3eb4db..907e517970 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0c27f12e504d6c09f4a8330a4aef612d83ee4af0d9459449f643c658e4eeca6a -size 716192 +oid sha256:96c9db11fe4fd453486905dc21cf2555096ca2be1387da199e7cc697a9696ce9 +size 716290 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index ab632417ec..f5851c6121 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bdf5425a07ba8ea11bfc0501869ec7b18ab31daa16f8fb5f2c91226a96a4c9dc -size 630836 +oid sha256:aa41bc0b9a1a26d10edba218cee43e8d7489253a729777ab6d458a46474b3c3b +size 630936 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index b653cb0d0c..fb891a6129 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bc9d6bf853f0582342497e2a1b54d1dfde7230979acbabd94a8587aa10a3d32c -size 801226 +oid sha256:9c2a0f81fb97c9ab511feb1cdccde1104dd72ce12d119b142c245699a04304cd +size 801326 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index f2eff4be5c..7bc7662d4c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2fd3228e7236c0be3e85e4f8ddf94199bc25f67cfcbfe2acb6222a7f2494bdd6 -size 708324 +oid sha256:493fb54a8bcb780fbdba468e5842c4bc4ea4c1c67b011b371ad52cce693b51a4 +size 708422 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 5068779ae3..491859f275 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:01b333ba240bf7dc783cbfa9bcf35388cca9e4f3d0322e5400e99a169383aabe -size 787758 +oid sha256:7a0d806f288976f484437bf06b9da4345be51d144e902dcf65a6bc85107f8eb4 +size 787858 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 85eb0bdd9a..c57b94dd8d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:13db1018afe21fe6e21b950feb9730f5396328afc5824a8f7fe52a6311e55ee8 -size 694856 +oid sha256:d88acb4ed6d65541861713d5b1f676875f29fbd8c0316f4cbb1330abc67e153a +size 694954 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 5b901d8ec2..35d9034da0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ee36b4726c2f7653a8ab312427d811f59b47371b2c1fa878086de185e1d4f145 -size 606173 +oid sha256:b8d406e183b3a6521abcfe5f4f506de2a24c435abb55cafbeebc9cadd5fba41a +size 607061 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index de0a898c28..d4a667a37d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:22e8e5c797952114fdb3824a424f3353b16c692f4351451f4d31ccf84a49dad4 -size 523581 +oid sha256:8c4a40347067d354531938fd2fe1b35a5f9cb0b821f7370b841fc1e777becf64 +size 524469 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index b681816079..257095c7a1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:af831e44cbd7d1f56473206f975670ccc2c1101c17c829148d8278ce39b589de -size 631260 +oid sha256:03e66aff8cddb45ba780d16d5117f62db65ceb50ed694b2bfbb6d34fd89ecb84 +size 632148 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 7c71cab465..310928de77 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:109cecf70cddf981226876b732f9d3a79b27ed6099bf18176b41ffbdf5cd1cbb -size 546643 +oid sha256:b3f934c98ebd80efaa5fd47887c7ffda88ab727d07508f1ab7580a51ede1f867 +size 546743 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 3e4abe90d4..578a590f6a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a09acfc5b71919db2c7db16f81ba9a8f333f5eaafc94d1f4f8679c2ac0678857 -size 611845 +oid sha256:94f8a94f192a299baeb907b618d12f893cc0f1ee2a626e7fb48d9919b5908580 +size 611943 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 03f7945671..d317768707 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:76a0fc54cc0d50269a4a26adf312f5a4ac55d70490e7d57fd38ff080ea14ec8b -size 530387 +oid sha256:0dbf4810a9f5fd43151d43a930d7a536be779d204cf26dcaaa575c3edac1f483 +size 531275 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 1f648c45f1..62a716b6b2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c0e605454207ccac9b89649a48418c1c8add5c6b03b65cc069201c1a3ba889ae -size 637720 +oid sha256:a93b231a4e7bb51ac9226f0e0f5b8b759609e9cba03ce2aa3e6fea2be2d615fb +size 637818 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index dd9dea0dbe..b466c1de6d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5074afec3e9bb0f5da4169164b95cc45f5c43cbcddc8a9a1c156cda43ef80c8c -size 553301 +oid sha256:d6a7555b845473f0c52267803da667ab06d2c9591e927d7816fc282125c91e02 +size 553401 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index c0cc77656c..dacb148e24 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:89b01760cb2b39ae5494a33558ac5f9bba8815ce85a642b10da00db94301172f -size 674434 +oid sha256:3ed9f0b15d0b595b98c6b1a5a7d9036de67bdaf129c5a6cda7f5f20b765d362c +size 675322 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 9a455c460a..b3a3e18ca8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:336b111570519f8e3f4575fa08b50f0fcb64c6b9873ed04c3c682b8ea751f1a1 -size 593123 +oid sha256:1d316c8c271335399275f96acc2e96dd824e5d728fd807fada534bbf6e321a62 +size 593271 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 002abea21a..34a1181664 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9e4109c33e50a8745c435754c31e6a1ea80b561067ca288abe2babba60d7cb27 -size 699272 +oid sha256:8990f099506f84bb5a105b75467818db74fc751a542f99756330a231cca687b9 +size 699372 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 166d518bee..41f7c78a3e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:688c01de7396f568361df52bd89b79691bcf2943ee090c671248e2248bfc20ce -size 618950 +oid sha256:dacb297d6007f44a92ebdf8d431366e23764ea27eda9855831e8a336f9c44c48 +size 619048 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 19610368a7..f8ed3d15af 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2568349018c235134e306c7a710ff3ba0a9948ef3818bb658e2ba2d59396857e -size 692706 +oid sha256:4251991a9ead3fc3566f369f6282a45ba94ee80e7913ba35e4b54e091be731a5 +size 692806 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index d50ca63d34..1b40752952 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:409d111c3626c5706c5c47aeb25796d69840182ebf75d33fa7e4d780378c8206 -size 607351 +oid sha256:0cbbc7100dd3947525d16a79208c1090a52cb75179f05f953f32e9726816c6d3 +size 607449 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 436649101c..d00ca9fee0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:675a88042acc0c63db17161fe4c940e880a80dd99effde791a5cadeafb81e065 -size 717348 +oid sha256:a46d514dab53e5ee2965505eac7e6298aa2944925d7409b5b0b6ab893606475f +size 717446 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index fec389863c..c07ee0b035 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:035684f87b34f46d07c9750512bfbc13853eb8258b455a9572cd3cfba4c37a0a -size 629626 +oid sha256:7c1e2a32fea4c468d273e7fca146ac9d8feb2aed094f3e1810554bca2962e632 +size 630514 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index b1d93dfc7f..8556f7961c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a05afc0484905425b89e45f0eecff096e2c2e0a2a3f816ae70df6c77c3d896e6 -size 685554 +oid sha256:4446035ced1e38f9e078186461784f5e72732e7cfdd19724d60f36728f637e07 +size 685652 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 3cfdcc155a..b2bca080dc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a60c0bd9412651b5c0f5bd63a2276b7a6e3cc5bac68d3bdc26028b176084839e -size 600197 +oid sha256:6554dfe604f85c1bb4e1a960e70da574bff966b97c012445ad8d33f345abc461 +size 600297 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 0ee6358fff..250d75fc2a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:68b4f79fb1b465fe9f07c816e19cceb743c75f583225e3aef15bf48fdcb1d451 -size 710194 +oid sha256:c14167b4260e2f334e11cba113622c9557f2b39f462e38d7bdc3e0071c7939dd +size 710294 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index fb141e3557..7fe38003c5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7a263666ced9e142cbe02342edc051fd132f3798678a6303ac8ac7c154598298 -size 622472 +oid sha256:f5d37f68c53620fe61ba5889fb6646472d90a4fc793af9d823c8cd912d458ef6 +size 623360 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 78597b037e..8e01c5b46d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e7aaa378ec4dbd2a2ec1d1f2c5f8d9fa1c71fc1194739108d80ad68ff4218d78 -size 695172 +oid sha256:76d76c012fe003b08ec6e41cd7de5db9e4584a3b2788a1211f30f2f0725181c3 +size 696060 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index adf7a3d760..b9d3950892 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:976d9a389d7658335eb83f15517fe39ce4925204fcbf5b9a9616c3369f13b70c -size 611197 +oid sha256:ce3a8c3b06dba2e57630423ffa0c4392dc8866dda031ba95382a2d0e3f439794 +size 611295 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 0f0bf85c27..c03a2522b4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:25f6a7b13e917604991674bbe4438f32ae1bbe07ef6682d3e145bcc79598678d -size 720898 +oid sha256:a497639a2296f4ab5a625ae570270626be5e395f3e0641d185648596c9f39c04 +size 721786 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index f05e16c268..1cadbd4b14 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c1ba1d2ecf62aa41020457de23279f1d762009bc3c482231d5b12c67dfba0d2f -size 634310 +oid sha256:6fea8f5c5e23743b510864942a97cedd35155d504c334bd50638dd3bc2e80a7e +size 635198 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index d72723e35d..a1bec9ef4e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fb87e0865cdbf418fe50ba72bce477c9f05d83ffcd7b6adfeb74439903809826 -size 688018 +oid sha256:cb88b3b5cf3a6605d6074f74ffa2b36f4d196feddf36e9d36bd80f4b51289fa2 +size 688906 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 60a8a0832d..2ac9315496 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:befc3f28c6fa7185abd606230c99d023bbe6bf9107ec5ca4abaa135405dec827 -size 604043 +oid sha256:b44e7b1a8d7b507fc91186732f6153bcaae982cecc003b17b97d273f0b17312a +size 604143 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index e318576e12..16938ce5ea 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d25e83ce6a398ed10c33be75be9bd91e19e20b04d153f9eb83a903f8555b9031 -size 713744 +oid sha256:a5f87c748048a1a692a2717e9abe846afb1dbb141e14a0e13aad5f11c948cd96 +size 714682 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 54981b783d..e702bcc9a6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0b01c868789c80741ca596fbbfdea69b0a82c277d59c2bd2a622c4d6116382b4 -size 627156 +oid sha256:2c661d25828c88b4b9f9ae669ae0db2f20ab8128392b4f1c6122b21580115831 +size 628044 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 4577cc425a..d206de657d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8f4527a699b1de9935f90b9bb25ffc39d943910051c5223bbc083b1e0778989d -size 763286 +oid sha256:dd0c6602c26673f0c98f9d12db3bcae4d069464765643caa6089ccfb99fafb68 +size 763384 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 265c19fe15..c247318a43 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd586e4b073a521045f3455f81256a2c035e319a2a3ae33a7c4a5ac047e69807 -size 676450 +oid sha256:cac1eaea1a80dc040a1398c0742c43871689b3b0ac990cc3d49dc6f283e78015 +size 676550 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index b1b36e4739..f58723fcf5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c8fda3e0377a32f27cbc236c40b40c397655114f8bd6126838a9cc49f00e6fe1 -size 787582 +oid sha256:8428312d1c5334105ba3042827e8e8b6aa984d1dd428dee90333db2a75c67f3a +size 787680 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index eddc85d0ce..adef40d3f7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a546105f9eee34d4d270fc83baaad0dcb09d495043dafcec9538e3ca01cfb26d -size 700994 +oid sha256:e1fa32f4be27145660f69c26b09d3caa105fd4287ffbcb1e79b5c285e5f8e035 +size 701092 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index b01f260e92..f666afff40 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd21c16a344dff8e7d5cda3736af79729ece82a20d7c12ba3e44b7c488315e99 -size 756132 +oid sha256:ecf431668e813efa43c95d15e2b9c509fa5be06df44a17ac263096298516336b +size 756230 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 4f477caeba..946626e278 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a08cfdd5fb883f7c47203e4e8a7077c79e8f197519a11fcb4f0ef7bdb5e8228d -size 669298 +oid sha256:8e5712a8b094e73841d64b7d667bc17fe93a9dfd01ee3d74e54034f93547d3f9 +size 669396 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index f9326ecd49..5b81da3b51 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:db93504791269ed97c8de02c8c0645ec2f8ae6f42b2cd5d6cf9f4d1c86bb723b -size 780428 +oid sha256:0a8c3eb1389e21b19f2ded7e21c59780d1dd747d5c5bafb12cf398a807aa1d0c +size 780526 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index aaf64f5743..7733da2f40 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d6a41e8838eebdbcb949c059c80eae9fd04decf8a133d7ed51e5fccbe14a06f7 -size 693840 +oid sha256:2fd72c90be76ae42931765d22aa62b6dd2cc104aead9988c2f52a114efc68956 +size 693938 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 85652013da..2c97d95e57 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3d78e5eb0c81e5087f9c67b15afc0b4d77db3a4dfbdd006ef02dec626b44065e -size 620928 +oid sha256:c9ba8186a8fb4002a18591ccbfd52eca328a2385c2240d9262e334dca1fa96eb +size 621028 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index aaadc0dd69..63012b8cf3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a2f781770a0dd613905756513a10fd3b8e44adbcd6c1d17062526620df58a2fb -size 540703 +oid sha256:486c8aede7a38826595e896509b8b6219ccdb54f999f4e6661b9f70a637401ff +size 540803 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index d2d6dadef3..bf70b43127 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cc7d8c1da7b8e2588ac2f82c667356d31ed7d3aad8d9e8466b7cbb633040b027 -size 645964 +oid sha256:d89e7b2c38a34554902041d2f5ffb7c7edfe227ec5282a48f35b4f1b8c0ca20b +size 646064 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index e03cf1e2d9..9b9f521106 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e31fd3953a4e9ca34a73d8b6279533a9842def24083a7f878ff2e4ff843bfa32 -size 561695 +oid sha256:9a7fd0784309762f8805bc927c3ba9c8eb5241be7a637572cc7810dbe911e9fe +size 562583 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index cda9bd7123..daadf65a36 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:06ab64d666a441f718141e319d9e5e14db13278f8e131335528638d7c5658d27 -size 626600 +oid sha256:6fa0d723bdc9ca9badf1af5537cdd1cfef92cb389fc8bb69876cc83316035d5e +size 626698 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 7f07d6f16e..a17efc6ee4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:528c944fc0bed194e66ee3205f952fe9af8c995a790cd6e27ce2a76fde6b62cc -size 547509 +oid sha256:92db0b155b61265da1cb804b05218dd11d956057eba4aede07d6e5eb97480f13 +size 547609 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 22e1f382bb..1d52134364 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eae35047bff5cd85ee17c4aba55f618a0f32d239acd2737e57a869cef7435ca4 -size 652426 +oid sha256:e82705653b178697791c54dfa28ba480ab881376cfe5730b70b64bf80b1b9ee8 +size 652524 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 09328c17f9..38d5d0258e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9d8fb7612e4712475bbfae980929e2babe709e1e527667600297a6c6e0345f64 -size 570425 +oid sha256:7e0855e76f81f04cab70ce216f80996df33a811a132f3e9d7b9b78fb95e19811 +size 570523 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 87e8f5257a..bd577b0f71 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:93aca45cdbc4962af3319a1263f3503f901edd2a019247baa48fec4df5f79e01 -size 688350 +oid sha256:e3b88ed01ca746c9ecb37194f1ed2cf378e48e5d0ad948ef310f2c67ffdaddcf +size 688448 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index ed086b3986..d203da29c5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2e8d2cd082a6ef10e828eb6346f331a731d36f7132e88a4abc113267774fd81e -size 608667 +oid sha256:c1b4f9e618687879eda44b3295e33c5f8de8c1ecb8ad93604e1088d448291381 +size 608767 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 1bada1e3cb..74e514abc3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:05b5670b4f42622b053e5f8a8984c5284ce4596316efb6f08c5aba8cf0c79549 -size 713188 +oid sha256:8a85bf65ac04b73471b846bdffb9c5ac76f47a4728244e4a5e06f90edd525501 +size 713336 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 1a7a5ef338..31ed5fb469 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0297221ff6ed9acc7b965a329932f876ca5c31139b3a71a403d4a2e9bfb689a3 -size 633952 +oid sha256:843872cdeec4fa14eea2e53f81ec15b0aa0e24b718d544ef2944011ae53d9b1e +size 634050 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index f8442ce59c..48e1a813f3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:491f948b3242f76fac489d3c90c4c4490b3ff66ca43a0b2220ccfad8e1a3a41a -size 707412 +oid sha256:54876ce83819bd862e2c4196db34c00cbd4e75b19238eb8184e5da3ace554b80 +size 707510 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index e7780187b5..777843a231 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4d2cda759dc766aabc0dadcf2bedf25cca0d68e4dbf0eac656df8d6f4c3b7a3f -size 627682 +oid sha256:dc220134cd1d9d32cd8ddea325c891b0186d4ec1c6f4499561f86ab1c10fa4a3 +size 628570 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index adf41b60ad..b93ea3e7da 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9d306094f3d021097b7c011ecbc01d4c2f55819633910925c1d9c518bc21f9bc -size 732054 +oid sha256:a1ed327f23584c6910987672a35184f214377e4e1df198c9285472f4975a7f2e +size 732152 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 32d8ae58b8..98595dd347 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:81fe436092b4852961a5ada0f790b9879e039bcbb96f0572c1d2890c9d338e99 -size 645120 +oid sha256:526a73d560f9a4b47cc3b20c5dd61f75b1c60535bfc51853da18bc0031d350d9 +size 645268 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 1a82f065e1..a8125c662a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4a921e834d131da08e3b55f5119b9fc084a9a8256bfd2868f36bd1206af38cbe -size 700258 +oid sha256:b1605f5724a8d42561bfc05570e3569329cee0ac7abd96f3a64739b6fd2cd0e3 +size 700358 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index d5db7c629c..ad83acd786 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c3c33a25da9ed71661a06599acd9c77d1b7c43652daddbcc5e696900e74ab736 -size 620528 +oid sha256:821ee529c926cd1e0c8abc97c82083c447bbd488fc96cbcf1e9c8065cef7473f +size 621416 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 289cd58876..2b2ed0a875 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e050551efe23f96a8ee5cd624d474318ae95bf8052f6e2fd748542137a4734e3 -size 724900 +oid sha256:78912354a5472a2f9eb0bc49521673af7d45dfa0a57802bfeda2261548ec1e39 +size 724998 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 0c5c299c76..b091f110bf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:acc97d488a5f22afc522a55cb7d00f87b077a8ffc402695241e6928b39484dd8 -size 638016 +oid sha256:f03eb854ee05f27d593455632fec78723be0e9b3a2f5167f75b64ce17f327ed3 +size 638114 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 8c8227d145..2646677450 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c2cb9f27831f21dfd52d38e407291c1f9727a9c0ab335ee8152fc1979b4245d8 -size 710666 +oid sha256:ffc4fda9de6b561e91f41018e755a907527e2d3d4cbc536298a8328cd77389ea +size 710764 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index b07c2d27d7..11a740832a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e8ae3f0a5d0de7919c984daeb2f33aec1891bfab85fa04a246987f94888c5e66 -size 632120 +oid sha256:ea4e012ef9f45a26590d97b27d5aa97f8e8f61dcfaf069ec44e7efc3f5fd1c20 +size 633008 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index f14e0ad8fa..5eaad3b4f7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6d12df2b0a548b2a9967e73fd7acb5a6b32ea505acbece54241214c254230a0d -size 735652 +oid sha256:60ea1d580db54cc236caa93c01cd7978b6f59e37bde975c788494b354d3197fd +size 736540 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 1eee241766..7633d3e9e3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:280a72151ee552787cb3c7a96991ce477b65bcba7cef4ccce67c6bf82023ffdd -size 650644 +oid sha256:f7332111c4b43c3592372eca5e0ef52671b3d62627f4f94b8c7bc5ea93d7c69e +size 651532 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index b92cc67554..c7d5d1d945 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d51d017c8f612bcca038826787226978ff72fdd1524ec3fc22c902723de30ca0 -size 703512 +oid sha256:b0774ee825cb86f428727b2ab7894da928ba1d84200d9ae6011813dcbf08f19b +size 703612 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 185368f7d5..8d414c5a5e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:102138c8165aaf05019878719279bfb24586e906cbda3e4b07bddd71e6261cd2 -size 624966 +oid sha256:5d033c5ac9698e5d2b01de4ce0ead34ca538cd0d6f82cc6a687db099cb570ae7 +size 625854 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index dfd40332e6..035f0bca45 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9f7a09de85ef70cbbae110959104a0a03c72254492a25e9c54ee8b39fbf3adc5 -size 728500 +oid sha256:fbf092568110c44e2c5f31c494489af0e71b30cdf8c3e50b21653ed1407aed3d +size 729388 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 216fd3c2c6..9840be6378 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0c40dcc6ba0b0f1bed85b0bdf45096b53c4c4120407804775c2573687d6ecbe9 -size 643490 +oid sha256:c9b5a00d1f4dd19a684580e59fa665e8528623ca85f8e26abddf02ff3200fa87 +size 644378 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 1e3629e9fd..a812e541da 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4f94398c83872fa0edbc4a54bd8ae160213b03eae0a20d56f9fc07113651d31f -size 776412 +oid sha256:4615025f483c7d7dbcb7543ffa4e4195bb5cf398116d11d5478a7522f3f8c96b +size 776510 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 5ca28a790d..8bbe4796af 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b52a1f0558d164466542c6ab8dad6c3303f6ba5f374e5f197a1fb09588480703 -size 692636 +oid sha256:58cee0796b2861ec1f0b5bf67a5213aa5cd0683097aeb3ca32696fe2839b43b9 +size 693574 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index fb19210fe8..26fc597ada 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:59a426062bbd2f80e113a6cbaeb15218283be0eef51af452e7b1b985dabe2909 -size 802336 +oid sha256:c4c5e632e6c2fcfd605a31554ef358cc5d84c292e208ef0c8ef55a5b75b4d5d9 +size 802434 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 097b2eb01c..1dca2df925 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8f5ba49c213148606f8320b86ec1197f58783b88a7bee2da3454b57067978a08 -size 717574 +oid sha256:0f9bfdca2099b40944e59e13518ce48cf756ac4d432fa102b3d6eb2f71de024f +size 718462 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 43a7645d9a..e658d7bcfc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:024f9102cb85f444de866096c94730e4837b1f3e3f0f2d7a635255b83df748f5 -size 769308 +oid sha256:323f2f399c6b563829edd8de50f09b7cf9b86302c24e60229ee31a7041104009 +size 769406 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index a2309b1009..579b829e65 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:26b5e62c9be8c398f0357af32e4beed8ada6dd526326c5ec01d51478782eed3d -size 685482 +oid sha256:145d3968244449333fde14dcc98aa4ab02d6be6dbeca1a86f6767cfc71ab411f +size 685582 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index c3a4fa2ced..eb379b0d15 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:912774fc420ecc1c1f36d40631c6033b0d5860070499434ac50036998a758be0 -size 795182 +oid sha256:58b02a7096fe6b944c428255cbbba0de69a8ddd7edad602defb1fe494b89b91d +size 795282 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 2dc8195ed9..3c06dcf3e7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f23b96735680820bf250b0e291e5546868bb630356909d925d9d599ec91b263f -size 710420 +oid sha256:3cae01903a98e3d4889ff8a5254adf9696f99122b7a6d0bf3146151086528972 +size 711308 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 39d34da719..432bba3b22 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ed3d7ac0a42f2363ecc5106765e2d70cbacdad700df22ff10fd52dcd71b2f9b3 -size 618364 +oid sha256:13acc13bd486d9800b9d7cdbfac32573cf1311bcb7918028edd1fbc9c1331717 +size 619252 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 91314f1219..ebec446640 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2c9d0e2f6ec65ffed6ae04c4622365a69c10f8443d97074ee438c0d82e2d48c1 -size 530935 +oid sha256:59740db81bc83224c962390db76849e803bfe34fb3bfefb0cda7a4722a350726 +size 531035 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 90329f16a1..fe3aaf2c88 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:60e86d738fec9950c804c793093842979a7238cd0c2ba6d731e3592cc5242274 -size 618164 +oid sha256:200fbc715581d5cbc4de80c7731175402a4b139cd1e621db27f1ce1100548d56 +size 619052 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index c73a39e073..b4d335096b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:faf449e7ad8a71e298cc50fdb3ec1a6439a191d07d9a4ea9c5fa02bd889fb1e4 -size 534189 +oid sha256:9b030f827067f687391b2d00cc0006504aa1840f9552e9b20ee79d12e0251380 +size 534289 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 4f4c6c7711..6b906bcf60 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c4b26885e457589145468eff851fae3a2234bf19260d815e52dcf80429f00274 -size 683220 +oid sha256:c7a5d2ffa311c6aa4f0e9aa814c6bdfb977e4f8bc88000c2e328b9f4a2310487 +size 683318 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 0efea9233d..6c63df2354 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:93976e663ac4475376b7baf320aff7454f96cf2cd99b7a2998c1d36c6beb4b65 -size 597617 +oid sha256:3475bc18eabe33a66b41afbfc10a623966319e7eda6f5db6f2b687cdc34d061a +size 597715 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 69594d6679..f0e8a3cc70 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c4201d525cacecda3e026c334dbbc3c9f2d8a6bb70fdf37bf59e60d036eaf465 -size 706622 +oid sha256:28cce2903fcd32cf4ec25219613f42522f712046de12a5512ff8e0c11eb28017 +size 706770 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index be315279e9..16310d2fe7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ee6ded9473d833d79c7456be21c7460bdfd88a3e48609622cad83fea89b63b03 -size 620478 +oid sha256:6264292f38bf55036bad010632ecd0241f1665481cfbdd5fe098d6478bff937b +size 620578 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 5c935386b5..a0d50660ac 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7463adaa09dde8b7e50ee354dd9065d55fd882a5b5771dabaa5be0cf958df18a -size 693154 +oid sha256:e4ae5799dab664249aa9a01bf6220b723fb511634b0dd218a14299641855e06f +size 693254 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 9e14d8b38f..eb359e296b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:985dccc0c623a2ccf6fcc5d6dca7258a177b37257c5513a1f87405e0e25ebad6 -size 606961 +oid sha256:a35120b63b38cc501f1223dbb03e1d6090455362c6027bd589eac5eb0f2e3b79 +size 607059 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 2e0b90c9f0..ba43852a35 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f99462a41915ae0b4bf77a39a9a01b620aa688be5a1d1882c1d26e6e1abc62ed -size 707262 +oid sha256:ee56a5adf7799618a8f707f859817e86cbb882f7c9d3d0d23114d36e7ab7a670 +size 707360 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index ddc3c7bc54..6728db9592 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1b1cc8f3aaabfb8332b067bc139c333111b81f679dfcba26f40e893b931211f4 -size 624374 +oid sha256:27b96c4c9e460c5b796359b192053799c55dcf03c809091f9bd07b5188ae5580 +size 624472 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 110c9cd6c1..3518644e92 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:99e09286f0117839189c54226a8d5e7ab76e4a02997bc177e3a7769304dfe4e2 -size 693744 +oid sha256:1ed57e2689d5c9a81d53942802ba28b0c67878e4c96c79c60c070706977c0881 +size 693844 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index c737a10e0a..a640671166 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:71d53d5b17730f2e44c93d18a6d4edb1c6b150d686c3798393d29ca8ae7a23cb -size 610905 +oid sha256:a5a109138e60de52fd60985a573ca458789108e48a0ba8e63003930d81bc0606 +size 611003 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 9f4d4e50f8..4797d1573e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e8d4f2ec1a7ecf98c9c64b75fb7b65ea813bb30038bd16db3b318ca035e2b42a -size 778830 +oid sha256:1bf8cd3bc1105c4f74fd3a89f3fc37e93c46f9b628fb13db1912b127287ea32c +size 779718 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 59b5f8982a..af24e81700 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd83942cd1a780e5440f5f1a60a75e50cdde8428d19ad8c134f7cb89ad2bc813 -size 688394 +oid sha256:59640b6cc29ce3eba87bf8121eebddff47194869ae9ad7d8f099ac32b9bfeef3 +size 688492 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 53edd3a00e..d07b0ab162 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5d88651cddcdfc146e45186e21f1d5a153a2fb266b76aed582e82e6ffdadd427 -size 765312 +oid sha256:47cd95169eb46ecd023274fcc7fd1b4f7fd4ffa81579e976a4cdf07fc636f4aa +size 765410 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 80f0256201..c97ffe8cf8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d6a35e872494b5b42a9cb201d9fbeeaaaf867026443d5955a4b737c3cbc007d5 -size 674876 +oid sha256:830b1952d671e57a2f30951f8e392ebcab745048dadfac3fb4e6bd8022d3c02f +size 674974 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 1bba2e078b..f2035f701a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e03af489f82274824702f12068884c2457225be1683615774cb34338d6eef4da -size 615151 +oid sha256:aabfa7e76efa46f44f27b5932290bb0b148ad6cb45286992a7361391bd5c2e1f +size 615251 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index d37ea9f640..429ee94830 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6bc0f9ac82ec736498923ca989f0d4000932ffc8e73f157031be0e89d595a425 -size 532511 +oid sha256:5d39e2383397a41ef7b0fb1847735b3ff75239eefd8b67880e22bdedc8f9ddfe +size 532609 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 9cbb320afa..0d52176832 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a2a68d41773291264dff5afd86895061516d0f55f9bc85416730439883601e9f -size 640188 +oid sha256:1bb4d60a1e75b5a3207ac3ceb6db8feb2e50fdfdb9d5a3a0b1651f9e2052aa16 +size 640288 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 3f0ac89f49..202153173c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b7e176f343c0144ecbe9514e1705ffbf2887400cdb412e41491ab07554cd4b2b -size 554783 +oid sha256:280e7d60bd6572457a9fd4b5432a81cc8961db34eb96317ccfbd9e2e0868858e +size 555671 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index c2483c2842..622eab7a2d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8c22eb48787f458b117a990c609008ac5e198579530a3396ba39d90e5b98b271 -size 620774 +oid sha256:b7eedf47d3d2edc1ab2199a38d44eee4f015db3ee225fe45426fec34abab1f47 +size 620874 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 97e7075949..bd6b8a1e76 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bc4bbdf149de35db9e57194761eb3977c6104f6665f0b83220b949373648378b -size 539365 +oid sha256:a0f9d5d459aab9e4ffa10210d060dc32016190a74334108e6f1353a62c0d4035 +size 539465 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 444d03ca8e..50b1685935 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:92f091bb1533f96023870d12ba77c7f3d3ad122940cbf9851816b0641a0887f3 -size 645860 +oid sha256:16b51f4322ef4bee5a8eb3cc81727eeb083f398703e2ef303bb94cce77a726c5 +size 646748 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index ecfcde5388..1e9e0d5000 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8b63f67051acea95f1631d250ef1a5baf4ed9cca376958d1d36b66ed0a9e3c5e -size 561441 +oid sha256:fe3b2ace5ab588dd0fe48c9ebbd95474ad764a9113fbc8031b20cd489b8f4f5c +size 562329 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 27d617e8cd..86928647b7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b28a6fd86e7d00e10b27c7f02c156dc20883ae4da86ea915bfa2fc9bf38071a7 -size 683364 +oid sha256:d3c5dfca58be9fb9fe05a28282df88d957cbfe05e79306563c69f71d5a63b367 +size 683462 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index de41833c7c..84c8bb03aa 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:40e632c0503ba098cffb6f28cd127f2760579b6d5b317bb66dc50fcec9554373 -size 601313 +oid sha256:d30c1686ed00c1db302f17a831f2a9527276f79604e3684acf7b587293a4a8c6 +size 601411 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 8910bee5a5..5e58ce2ca5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:34f5a9b97734337f0655d2a02571e6b05e1b06dc057b72c2a49dbff72aae4963 -size 707412 +oid sha256:2d3b4af244e2642fe3ea7e4a286665a746b92dbc6a0dd10553b75613addebce3 +size 708300 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index f84038b960..9b8d384693 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6d9059d6c33691598221fdae680d29cbbd99cb1a1994339b689c287cfaa11036 -size 627090 +oid sha256:4317c38b96eaa2bf9ad7a4122c72458305dcd81b9f405d17a965b67675a13758 +size 627188 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index f8f9ba4e3b..1d5dd18030 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0bc5a5b461b6ee6db021625ae992f4dc4bed1a23500207e3a9c296cda54e2f9b -size 701636 +oid sha256:9621fe9900c920c1e25cfe23256443af76872dd7290b6a0efc38e4494b0e589c +size 701734 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 77ba8a994f..f704d1a63b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:45f814886e4958e04c31f20d7f361bf51a8e223600a9506173c7370584ec8d57 -size 615491 +oid sha256:6d958a687ddb63db4cd4d3a5d9a4417aebd8dc7e2a3c2499de13298c06079c54 +size 615589 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 764d962935..5de5252d0a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9c8b663433f57006f137a68f6ecd77b579be522433521ada5a6e754b0d6d7a42 -size 725488 +oid sha256:4d711d549a78ccd71c7253268da23e82ec3f12c798a426e318808bc5e1633134 +size 725586 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 8f838916d2..f421e2e745 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f6b8b5542cd4676244da7bea57ee4421afe21733c41c6ad7125b2edc2fb91033 -size 637766 +oid sha256:ba4549f49cedf371e04625e9055926a756f8034c07d4c31e5c1013a0efe8d6b6 +size 638654 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 9384c6f4a8..efc01f7ca5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:adf55604bccd668ea3cba99c70a75e1c073c89741e6f534a72722bd604bed627 -size 694482 +oid sha256:da621d2bf889d1170a80bd31e96462ae64fc5afdd499b1b8775d11c474a94195 +size 694582 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 00dc5d203f..c11f12a50b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4654c9f280133787456f9c1b6a43e91bfab1df86e3068c7f7f8b1c2901969d21 -size 608337 +oid sha256:310cb87af1add71317e89f8ae405be91798d5aa876a3f40a539b2e70c99bfcc2 +size 608437 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 0e54645921..43773caeaf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:649ef35dd2ae3f340eeb2281c18d4e7d3de186be450d58296d132e945cddfa18 -size 718334 +oid sha256:fb1e6b6d0170b1e38ba7b191ec1f04591e9ce275d50bb64ff130979debd42222 +size 718434 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 2088c9a2d8..e0598b9452 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0acf4153fdfa49397e974390d4d66c646d8a4c4ed4ad427df8ee5c08ffb53e66 -size 630612 +oid sha256:4b75e81e43708981e6cbbdb72f3346a7214fc930f1767b7ce54f6dcae246d654 +size 631500 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index d62a64346f..69d6f56660 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0a4b04c67769a884c354aa41e71905b62d92add42f1543dc98e61eeb5dfe4cbe -size 704100 +oid sha256:f917aa023c229a361374f6b477950309945e4f295a77931d1331bff4ec562e69 +size 704200 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 12085cf426..2beae75436 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0572f6afa046082061f16f541618fb156819e181d9f3126fa95db12ac161c225 -size 619388 +oid sha256:96aeaef9dea80271885e73593ff52c99090c19360ed256f557758096246b0a21 +size 619486 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index dee99129ff..3e5dbca747 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e3f2e63bc0065bbfe36d48913c27feb91f889542d0391ba00645edb50f35b00e -size 729876 +oid sha256:d6fc48bfa7c828206c6ef807f1e2004756e491db0bbf0b2f4d23cbf5fcdd1c41 +size 729976 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 1d4c2f7d17..eea4176bab 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:05274e99c68a935b263d079cebf1a2419d1df4a222b7f143cf9920ae06d1632c -size 643288 +oid sha256:60ebb27ef2b04e241fe49a31d484fb2e2a188ebb51ca6e96c14a04e918a943ca +size 643388 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index b0962f6158..e5c7cf9aea 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ca97c196b27214ab95045301220e4e13ded7bdd752bb1c578d2ab169ce0edd7d -size 696948 +oid sha256:c8f7f213243a3a7352596376fe12c892aa613e4a6d36de5284e9f21d4e06e68b +size 697046 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index ce1e626024..ffef38061d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f21a519259bea202640e99e357047f2f6462bbaef50266032282ec2460825762 -size 612233 +oid sha256:4acba6dc5d30c00f8e77680756d4b36735e2d1c39d86569f61fd50b7213a012a +size 612331 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index ea5889e2ed..2fd272ca1f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:64ec1ff6aa359f323a1b6a3f21087a507be51c4a819f316223f099663afa807b -size 722724 +oid sha256:e8694cefb815d39c0bf28ac13a7ca7d5b3b7fef99293a0b8f536ebf4fe5656fe +size 722822 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 521bef25ff..be7c50394e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bb7d1abc37733731fcd2e430d86b7dec4cdb33711f0271d2560fbfcbc6f84308 -size 636136 +oid sha256:baa94634e29ef5fb5c413113bf435bc28066f8fd2cbf1f8d2cf13fde007314a3 +size 636234 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 6ee015389a..0142ed79d1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2c0fd64075f9b60cd1e9c2b6306b0bdf4e8f540a249daae3a6d0c7575723a848 -size 771426 +oid sha256:7fa475bc1683016c0570e1658a850209df20c460b5e78c0eb34ec4e5ba1ab4e5 +size 771524 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index f772968f23..0bb4af2206 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6f6756274b2aa7305e39a89f621080cf6a21237ee94d0fec69f6011709591e8e -size 686022 +oid sha256:7f7919bb6b1d9560087857e7f84526fde6e53e4286ec42516a3d62ae4308cb37 +size 686120 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 776a415109..84af81ce72 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:39903e3c4afa87dddbcc90466ff3aabdb2e78707eb19e7d7fc639143a8cccc74 -size 796510 +oid sha256:07f0a737ac84484d3bc97343c3fe4838078bd0eac32a1258c5836765956d7f5d +size 796610 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index b95c386be8..b84ab4f5ed 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:12b09287f3d6f31d13195db3de4e234287469fcdbb717f0cb213bc441f8f4d37 -size 708344 +oid sha256:f22bdefd8b643d96a516b7c9e1d3631cfc8343eb2153a1114c9330f06445d084 +size 709232 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index b5a40c08c0..0f6a5cb121 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9ed657de9b7c3d64e440dc3c6263429ae78cfb6fb988a854b99f2ac65a1616c2 -size 764272 +oid sha256:19e2139c1962868111fcd18d4b564f3c1d1ff33aac85dc6accfa2ad90b07b835 +size 764370 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 41ba43a418..fd22deab61 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ab40ae9e81ddcab2b3a44f94d8c86bd380ebf50e381433c6d0041d443fca22a9 -size 678818 +oid sha256:19cd503bef519b384786ca3e3bff2bab80a25960c7af2f3b4a204ebe0c223be4 +size 678918 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index b03c36e46c..e2757df004 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:128f3e4b0a22152406ee59e408cdd80a703ab51016364aef4f6bd8fe00ed49fd -size 789406 +oid sha256:e8788d96ca451c2b639aa5cf69db4271e11459f4dbb7187f22a8f2543995b0fc +size 789506 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index eff258523b..a5dfe82dd4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a4b7203fcaa86f94d8a4202a3d52811d35d0d78cb35a5b9bff887c78872615ed -size 701190 +oid sha256:a7dde076d0251c00b2216c6705e50437f54c1bc85f8de5337937015cff66e510 +size 702078 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 4fa92dfd76..fcc87ebe98 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c0f95798f789df0b643886bb70d6ad9236dd64a0e76b0bb5a4529e609560d193 -size 686542 +oid sha256:f6617b7bb59aedd558face9e1be5432bf24ac3d75890814d69c3a1f934a6a626 +size 686640 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 8b9e1d05c5..5b896f8790 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:02b26098eb51b5d4775e8e17fa45de4d916ec6536c6cfb97992d504e958be16c -size 600347 +oid sha256:6dc97e11c90ca998655c1b213a7e0ca637d16c39d2b8c6d412005689a621e253 +size 601235 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index c2192deaa0..bfe27539e9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8f110c3d935fb11455ab1150ec2d1901740fb4a25f2619bdc999688d89217b49 -size 712712 +oid sha256:3d79dc2f4d675c4052f36f4a9e287b3220a3cd430e923ef5c07e52adcc71c95a +size 712812 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index f258c6fcc8..74c0ed5df2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:34deae3ae0c772d691b1b060f85045606ff7e03112306a6ca2d22d1edba6b337 -size 625286 +oid sha256:fe0f81789e2299b2374c66eb302dd7e1f262122664446d1f7b971ba71ab85ea7 +size 626174 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index c4868f3ace..050fefeb39 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:69279cf867b71e3a88c7fe6c9f10a81974b0ab7e75ea8b87b1ef710ed1bfbe31 -size 690782 +oid sha256:a3f0affcc6e18a29b828cad645fae77a11a1f5516299940a504fdf9443a4796d +size 690882 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 2201935fdf..67a135529d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba81dd04c181817cdb411ef4e470e564bec5d426b128ec7f06327467aeaa8216 -size 606019 +oid sha256:f1cb92bacbbb5543c54f6643c804e80b44f5c526cbc40f07f94b3459a703c930 +size 606117 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index f43ec5ec14..41539f2d27 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:34683c808b9f16e866905c01aa3a0f4dc8bdfb0673a094baf395b44f8527979e -size 718482 +oid sha256:12a61670fddda2da5319ad8d4af7def2f83951f15db70c3a047936f3130540e8 +size 719370 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 3b757d67fd..64c3d5f97b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:06d4d244f2b4a3f1b213ed28007579343fc475bb6e8ca273ba122fc813bf57da -size 632438 +oid sha256:bb62c71c5b8eb391ccd09c451eef673c2a7011d5dc08bd7cfab04830a73d26a7 +size 632536 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index d9846f3d98..147beea3bc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2f91243422868adbfa099092f58ddc630dc66cc5b3c6fe48b17dc5118e3928d6 -size 753568 +oid sha256:31a5000f0c2533af0e684e4bd4d2fa6d1cd1a1c8f4268cd24e9f78d61f6f647b +size 753668 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 245bc1c5eb..2fa084716a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2017d210f226e5e866a4ebefa1aff2f810b4a7af9fc43bff86019bdf43ce7aaf -size 669052 +oid sha256:12bcc8cd185c8e2c1375c26c1c4465f9212159807cbb79266038cad83b711f95 +size 669152 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index cf0e8d537e..2519df4b5d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1c886fa2b172dc5c551e021f5781be74fdab6e801af403579cc660c535cc4e21 -size 779936 +oid sha256:c83247b32b5d7e3ab8bb44eb890f256b1c9a7df27fb38a26e98c90a8ec840e53 +size 780036 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 98c0632978..e9ccba4064 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a151543bb2c46f9b83082cfe653905620a5c111f44ecd4b80775afd0e48e1aec -size 697838 +oid sha256:16104acbdc8cc6ab0a0b2cebbc978d83ccfdd5b3051b8a9ed661bd5d85f14a3e +size 697936 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 58940d05b3..be64078cd4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:596c0311d90b05de1d8a8dfe2f98d4b2d4fd39b6265d838c8a28b1dfdb4dc0f7 -size 786790 +oid sha256:d80ce6e3f0d18bcc149f71721b022c5cc2f63725c807b0667dd6df81ab8ba0a0 +size 787678 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 0fc2fcdded..8a0dffff2c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7e8f188d131ce2e054769e0bd8788ac6a4c62c807a8e17fe1954e8efa4e59d8b -size 694330 +oid sha256:321dcbe990554b7856aa232de30dbe5523a0bcb1022f03abebc1582b9b14ba00 +size 694430 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index a41c111264..8830dbbb79 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:705967686b98f93dfd049104f41c86ba84c818128cb5c29a6e4d6ef125dffaf7 -size 810148 +oid sha256:de0b1e969830978fb3d67220fc80ac12fb3afd9ab20e703dda44a882911d8fcf +size 810246 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 1610526334..62e0ad827d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9d6525633a841755f4e6d0574256ff11a232014c0ce30dc0b561f0e44c624788 -size 719120 +oid sha256:45bb2af7cdf5068abe482380bfe464a0c8edfdea4912d5b8bc3e6e2ceb95922b +size 719218 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index bd5670aaae..14a99ce312 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:99bf629894f76e73b11fbc7eccfc2f6c189ca192abd9c8c5460af0c1f00a4bd4 -size 771200 +oid sha256:6f3881c424a969df65a7481dda0678a2862461348298d73dbb0a48ec7073d32e +size 771298 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 42b2a0ce61..edb437c148 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:25972ea4c7a068c3de476f2834e59bb37e2ad2ec200c0b4e716134975bdb55a8 -size 676866 +oid sha256:eb4fd9a2bffa4906dadfadad34094080f39ea5bbca29ffa4d646e68aa0ad58ef +size 677754 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 8b788d7a8a..14b4251735 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:43c36b49bde3abf324a9573b1c1a602d2568879e245d3bfd5f85754868cbbe72 -size 795102 +oid sha256:3386c6dca3a1dd4819f34d3b95047e38be5cfbeb35d1f4f5ec413a2127132414 +size 795990 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index dc35b42c10..d1ad98ce95 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f0e3b32aca38f9f26a46bcdab97910f79273444f5b3e02e0c070c8b5301c5fdb -size 704814 +oid sha256:82adfe6ad4b3d655c0cc5015a5168c9f65ce9ab0040fbbfe58897f123b6fdd0b +size 705702 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 034a43c86f..0c09e1ea96 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:131e24a92470f01df5f27f0b30108f3389e1ade7e7006d3264e47ee6cf5b0040 -size 789748 +oid sha256:cd89c367e08f7375a8dc4092c0039040d95b161dc25948835ecea2a8fca15b95 +size 789846 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index e5a55fd140..42b60920eb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:db9b081c43928a6b2b52650cc6eda9fc0d069a44398ff15d829b886c133de642 -size 701186 +oid sha256:3b6f5eab9aca47896cf4f44cedd55f40fc2ef4bdf82d8f82706ad4d3a7a2267f +size 701284 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index a741209be4..a94d51aa9c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:206717376d850f2b1131f5d2a943f22581e666d70785a3f924df964eacfbdf2d -size 813056 +oid sha256:0c0d6d40cb96d7a84b3cb458cd36bebd9bb553926fd9cd601003abc539641d8f +size 813944 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 263241150f..fb479b1da5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba1cc1cf86f8add13b07a5c01d851bf56918e98130df52638760930ebc44e159 -size 723656 +oid sha256:e24c510d7411b28259da78420194ca587bbc64902d9d29f28426850e2efac763 +size 723756 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 241e040a15..df2a092ded 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b5513b39d8aeccca8897a7bccec5ef7f12709d50993658a0a6706cac85d8c0c0 -size 772184 +oid sha256:59f96d890460a561a7768e4d34b8bba09055b98ee9558089d7176c0c2db3d5f0 +size 772284 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 42100f2aec..79920ea439 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4da6060c29043f89d8a00d56e07c8bb54fb3113495dbb5183cbc459706a8f368 -size 683722 +oid sha256:8fc03f52288f51d9327f72cf68771c7fba16527b3fcd5997c2241d09c79d4ea7 +size 683820 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 4e6ee6c2da..39c62004e8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0536fe869ec490625c006209cb87453aee814aec21b0370a44948016cf33e31b -size 798750 +oid sha256:25f3fddfa44b62598f87455406b269b68e7ed05d7c6948506322ef92081672b2 +size 799638 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 946834801e..ec3483b277 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d0f14638af1b7720abc586864ff861e2c91fd93cf71f877d4b44805d60af3161 -size 709350 +oid sha256:e8e5619e29a3cfd3ff06ea4335dd3604c6d203c7e2a112bfd362ca1c51d1b9ab +size 709448 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index c5e1cf8ee3..515cae3bac 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:be1fba443d644d827941280ee3f0c14252900f6299c42b72c9378c6af86481fa -size 858206 +oid sha256:f0763e6fc6ee969d35562a9c24fa85b6223eabced32dc68b51bc1eb5daed4c74 +size 858306 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 511517d40b..ec21c9cb8f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cfe1957cb1a0ea6eb2578227b27575557d92f2022f03b67752b16fb46bad8e5b -size 767376 +oid sha256:943c76dc070bdd37cfc5e47f6a62bcdbe2a592608aca145df253d9513b4d1b89 +size 767474 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index b043733fa5..8dd9be021e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:72061ea9e0f4a297a52621b92cece7201ce4c246ef3664d02af658d23f8a85db -size 879740 +oid sha256:16ab58e997b3e019881d7b24dd75e382824e309a611564dad799378326b28660 +size 879838 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index dc48bdbfbb..64c57446d5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b4c872bc6960e7f1e8a0173f28436a90c816b17f7c37e1f26e594073a9f9525d -size 791080 +oid sha256:e002f3fbbade4012ca58cbd8d39b96e238027ca91c301d11248fef1a0c300556 +size 791178 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 0fbee66a84..4222470f9a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3d4456c71963672eb704be4e87b92dd3366897a195d8ea29a9127c080e6221ea -size 841926 +oid sha256:5ee1c34bc94ee757c4ce0d9debdcb705d6ea6620f5501c81b1cf8a577890baa7 +size 842026 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index e352f2f592..0194fc9523 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f85a54de2e366dcbc85f4c55aeaebd937e3e870e77c7d09afba5fc261b14ffbd -size 750800 +oid sha256:81c20cf6f09e3e0c63bc127547b82649688e61bde97515609f92954e9e5561f2 +size 750898 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index f87b544665..5a31c2078a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c6b6c17d295c3e5bc63afd29d46d358a8e6a7591f0dd7bbcf230ea5d80f23173 -size 865434 +oid sha256:510204c688f533be05b7c120e826f2f92521cfe22d2be6f7f070ec585ec7fd34 +size 865532 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index d602c05c41..c9fe1e6c8b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:51b062d4de76157e0ca075db8f1e5a2947532d2a3da3ba753a312f588caad95a -size 776822 +oid sha256:dcb3610379026923333748a493057c7564ba2945bf0308ea63272b8edead01bd +size 777710 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index bb77521dc2..51020c29e8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:63f16ba31a7271113ce3ea86f77bec436239e351c06d9d94dc1c9f188076ef4a -size 645348 +oid sha256:bb7e616801fdba6dd1417943a181b4acfab3553e33b058627b99c8bf9349e147 +size 646236 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 34d29644a0..865f77bea3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:31aed60cf00b44430960dde708de7d82e9db710c51c12b4b613c46c2a29b3d4e -size 548943 +oid sha256:fb615ae1ff4da46e19b3d9cdde963f8443f725a95c97a9cc6e137dc2c9684e4b +size 549041 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 883aa573d6..d84cced6f8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5e1bb85bf41ecf531b01894fa5711b9f1534c0415a418ccad45c8f8a758274c5 -size 644558 +oid sha256:9b845b82bed05a34a1a689da416db6ffadca6304fa7ce17655cdc31eac2882fc +size 645446 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 0f6d76958b..0d90d732a2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a8e173a0bdd50a966239e12a79a0e8ceb8f551f0c3b3483be69d75cd42c13002 -size 563789 +oid sha256:f57263cbd0c8e9cb3fdef9760ead57284275d5a6aa4b79758bd71e9f854029ef +size 563889 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 001ea31185..d8af636831 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8c0f41304eeb6c7cd1bee6577d914846d7099e24695362e3b9fb1f8315406221 -size 714448 +oid sha256:0fabf229667f7af2e20206fe3423d78f9e40622bbc94b1b416fbba70fef5c31c +size 714546 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 1b1cb41dbe..4e33506a2c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9fa73b16048a2e6120c9de914bf9b17e7850e97a9f5d3a08dead498b0f6039c8 -size 617251 +oid sha256:fd5264354cf62b6492761a39fbc56aefa19eca1a96f6d93db27be764e2d1cbcb +size 617351 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 9b4026de02..c4a450a808 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:502e40b0aa4955a3eb9c54e52a384d30a7d26ea0837a6019cc58f28ce6d488d0 -size 762468 +oid sha256:5972fb05c87da0431d52a2c85c055d5ecc2ec995d5f1f76e3ebf089a6fdda59c +size 762566 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 8471a14753..c77ae3321b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f5aa598cb5e7d56cef7cad18df0c720238d988b576e28596ff838550950eaea0 -size 657528 +oid sha256:04d6716dfdcde0dff386d2bba04e3c87d17c0711f2721c5359fc4b66bfe61227 +size 657626 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index bb3616e85a..2c3dcb016f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e49010fc3c0fb541fa2bebcf5e3a4f233cf93e1c08d17ee47cc564c7cc96c39f -size 734694 +oid sha256:97d421b99c7a9504dd99f3853383bedbcdbceea92647f1d84b8d457d1ff6266a +size 734792 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index e1029529cf..adf31062dd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:42b419fc8eb903f3ef22eb51576742adef95a137b99e77b3886c84cab948b1b9 -size 630542 +oid sha256:9e9b4ea8682fae9b4218e290b1214d8001e5130cea362499c1cfa8ed54bac0ba +size 631430 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 64d45b2012..968da56ad0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5e368ce7c47ceeea59956023852ccffe7afed1c7a1b16360d23c443357bc00e1 -size 756596 +oid sha256:fe9a54f3a07824874e1acb766ab62e84d9e26c6a39877f8422d2a9bfa9432642 +size 756694 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index e4c4bc5de8..b367801694 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ce22901a54dacc679b321c500f51c6772c55cd89d808686de5dd236b2c92226e -size 674596 +oid sha256:0b11fb16ef784afb68aea8dfc77f16c90effced825d6abb080868b5017ee91e0 +size 674694 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index e4f08fe59a..744775d479 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4164a898dcfdf40245660784ea43098bf5f8db7cff2709637fc99c862eeba397 -size 728820 +oid sha256:c880e3913b2883676f08ebabc411b6c46b12919900284fb6d28126baaac10028 +size 728920 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 6d5d524cf9..da888bcb68 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:20ec2375b5d6469f9d23def9b97ab50628b2cb6e94a63833ea424c4201eeed5c -size 646820 +oid sha256:53f9f68458eebdcfb63709f4b467a58852a80917264795eb93a6c8a8b270ebad +size 646920 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index c5dceffbfc..963b63d41c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1f5bd4554f6f7f55d027bcaf567ae18bfc8eb55c5fc643b6721f19a65786156f -size 827670 +oid sha256:22ce5a3dc061bef1edba808c2c4f561f825bb29b9ee30c55128a1de6352f80bc +size 827768 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index ccda6c024f..dff13a69a3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:41a7da560d23c917b0be46650e26d6380e000ee1f70bbe06ea4217c837ce99fc -size 724950 +oid sha256:e1d758d0dd00b3b2b12f7515c2d4a6536ae3eb9342001c8e4f468c0363b4e9f5 +size 725048 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 3b7e61753a..15e0466488 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2d2c1b04824f88f24593d863f951acb83895e8265895e039add85eda2021171c -size 799894 +oid sha256:c475ec1597d0e8ea7d6f95c1fb078743f32a4d7ed6d5ab6c2415979be9d0cd03 +size 799994 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 63fe0d9afb..fe4df11513 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7fae387719313d9e47b2da02c9a7a04d39824004461c2fbb5b3f76cfd7fbf727 -size 697964 +oid sha256:50f7cf192089900fa6004ba2d16ed13038c13590f77729a6fc5b7716f15efdd0 +size 698062 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index dae3d141ae..e482efe387 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4e9d3b4161449a9f8241c3d3c58e22cbdc16a8e1c11778967cd6d9021314368f -size 648156 +oid sha256:00654c51df95474538b7dc08ec451224a574dacf609fab26e480919d40936ac7 +size 648256 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 7d356c88d4..a89686c9d2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f82485b0ba91366b6b2e08375b9acc10bbc5a813b333a4497c7d2416471c2dec -size 562949 +oid sha256:14296184c1bfa330dd108a9c22fcdbf06ec92620132d27d1860871f4b21fcc2f +size 563047 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 6bedf4765c..5a23aa3b88 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:027595ddcb75ef3c2828b61736f0786441cef7521d56fcf168e272c6d0b2b6a2 -size 687992 +oid sha256:1a1207f01cdb1f716242ab28d0d8a2bc6508c4fe8c975dc7260f5f10ec2c3d33 +size 688092 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index ecc04f3807..52a489d20d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eb33c989a919edea3e8011fab814f350ffb8a0f79f0bcbdb66e09ea4cc75618d -size 606535 +oid sha256:3f6c337e116068847d771eba52296ace0d2d4e5e0ea3a99192541839d38599a5 +size 606633 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 1899b21ceb..808b77bd6b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4a844b6fef220cab254619aabfdc64348653261463dea5e24d43a0d421c700b2 -size 648154 +oid sha256:aed6c291fa1e0314c3e8fcc6b053a501f1d7b3000cafc73a5b100437aebeb0b1 +size 648254 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 1f21e759f2..a2c6ec30ad 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fa78f372c094ee4bfee688dca2b09b3e4c3f0b60d60bef8ac9ac30993b8d614a -size 569557 +oid sha256:5f75a36ba1f9c067b0c4943910b7dc53da95adc851fee0ee3f6b06e47a204fff +size 569657 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index e4204645ef..7e8bd63f4d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:352bff0170005aa7fad92cad26f124e9ec438e659bf943aaf654ccb788970f58 -size 693270 +oid sha256:f71d9e6d72465d323e0022838bfd7abfde83c97771c5580351ba2fdc6af6b14a +size 694158 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index e6b744a150..a36a59a63b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:410b0896caec4144e452121f30c4132035f5fa70e3fd59daa4fd4c0786912673 -size 612057 +oid sha256:0a51e69fe94cb2b9ada76427a4b12de6429bcdcc3bdd9b4b672d6ddefc3dbc20 +size 612157 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index dbfe67af98..4c3769260e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6a321454f7cea2d2a92726958e368e24bab8c395b1cacabf46542a5341548814 -size 715184 +oid sha256:1ebcb91341429e9df6b69b62f05d9afef6185448bfc297adab91bbd13126adc6 +size 715282 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 358aba9b4c..146a497d53 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5f83e3a50c73f1344c5ba5e8c034eda618f08c07989f06d2b2b34c1f653a50d3 -size 631900 +oid sha256:37ee3078c8bdf9c239e541cdfa0663ca24e951900fbeb9f2d162f794746faf4f +size 632000 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 2c17f45526..6a19400270 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6a22169190d4f74d9880eb3fece9fbc528fd85159a80f6520d76deb778f2d02d -size 756056 +oid sha256:382db154aa9cac30e15f6fd4329fe03f1cc7116d773ad5e08852c05b83189416 +size 756944 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index ab563ddeb1..39da088d61 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:92b82729caa6d2fb81405ceea8bd40527b72f110da66364105c83d6887c06588 -size 677064 +oid sha256:565931d27ac1143ec1cc84cbc9f540b84f0770aee8a7af9ec7485ee37c70a568 +size 677952 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index eed430ac8c..3fd2884514 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e55c784aeb057f376af9a3d75fe88fe4371d0c8d444079c79fa51cac6f959bd5 -size 735972 +oid sha256:718470d721e48f3fa05142199abc20c41d9d723936026bd9af5e5381f1e63c82 +size 736070 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 959c4e0c21..6713aa1612 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:27625b69981b97655d3508bd12fd1dbdc50d7c9f811383ad23aba0f0dd823100 -size 642922 +oid sha256:af089f64fb9cd5a83d26b73f53851103453a3319efff38af48ea4c8c22138682 +size 643020 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index e3c26097c0..9e7a523111 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3397f24a7f01163ce0490cfec3793b25663ebe040ba161ea2024f31dd8772030 -size 774230 +oid sha256:076a8e824ed1a42d42a19f6c2f87936e43bf00614de31b66c689087a12809f8b +size 774328 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 9c53de1871..42fc6cb38e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:38184420d5d265d8d774df15b1fbb40365e0e3f8844664c21aed16680e6de139 -size 687690 +oid sha256:903c6e014ec230c1a44dfd3f26e0a0d7642e0686e145e4f7bb24645304fc5fe3 +size 687790 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 85cec03a83..49246bd87b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b9f5e5a10af6f2525f3e84c07c2c79aa640ffac947dd70a7356570c931bab71 -size 728818 +oid sha256:2b13c30bf7494ac82f9164ddef2c310e47a49c851be17a3c0721a1e46aa5d2e5 +size 728918 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 251957590e..5d1c5e7dca 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:78ae88107367d5971669551c16474e34f7754f96ed5fc9505075fec505598fd0 -size 635818 +oid sha256:4f0fc67eb7dfd1ec786cdaeab8c3d7d0971c5b6fb3c9a2c50f2b9189685a360e +size 635916 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 7657eaeafd..7fc728b2e7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2d99a3af291fcdc006ee36161d2b6ac104effc9158465e3695cc66e6bcc62ec3 -size 767076 +oid sha256:cca6a561a8b549e9cb2489eaca22fcdf925bfa8c5101f5bdce59933f35783f07 +size 767174 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 79c6bd3edd..130446657d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c2b25d1c5e92cb88c899978961bd9bb30876b663f2b5f50f63ccc8072bbe1689 -size 680538 +oid sha256:be29b303bad75093fd3a989852f97b3d679e483ec9bf858acd5c89b625451a4e +size 680636 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index d7da2dd849..5234ab1eb8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9c6232ed037c76f0f58fd2d2e2d3fd71e67e419877309ba53dbc3b725612b7ba -size 731776 +oid sha256:b0ea3e0c7764a77787b35d4bf08937ca5af06ff6df71ba19d74327f753afddea +size 731876 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index e0a8edb2f8..e39b8ebfdb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e3e86f9311574ef3afc914291314c17e982a1a495aeb38b4a7f05eb9ef9dcef2 -size 650664 +oid sha256:feac56fcb285f1a375faaecff6dda1442e287d8f1174b5d7017b136e31eb7002 +size 650764 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index cdbaefc077..e0fa4492f4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ed5d59f0383d2aa085ca1ba189866e6d9b871b066a47f19814133828fce590e -size 777138 +oid sha256:bd4595deb2ab9381da060d76a67f96bacf616a885d3dbc5b87d2cce72dcd0d8e +size 777236 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index 1523128eab..6c18e02d5e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4157abdcbf021742e049993f0e0789cf05d58d87933785afdac2f4de5586da77 -size 692572 +oid sha256:ae5e9579be741ac42be2671342dc249153efb5bd12dcd8a22fb5e033029709c3 +size 692672 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 67aa5d1627..2626e841cd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3e85d3ae58560212137f2c0f61ae1ab161556e6a44e01562674fd13b6ea02d84 -size 724722 +oid sha256:1fb5677c9c874dc1b0f2d316dfc349a5da5191e976854446302131e5e58c35c3 +size 724820 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index ba40d6b73e..a1b84a0b16 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef2713049725d00be8c967b0e330c886aa163633ca09a34e813501eb5ef987fc -size 643512 +oid sha256:de08410a365952d5827b1a7c434f005a7a4673dd855e227f56cf8cb80fd3262b +size 643610 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 1d21ba7f95..2c33d09706 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:76d2a9fe51f302807b4b88232d5a148638e41cca34d2048fb5d3e03ef175f2eb -size 770132 +oid sha256:c4ad9b2a0f1fd2f8ee6793eb4f3f4dc10d8c160d79f214ec2f0d739dc0282b23 +size 770232 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index b830d86e4b..35d3435a36 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:83ef2c67fd8ae795deb6a9f6e38b22cace0805fa263eb5517170dca57ba59c48 -size 685420 +oid sha256:e7ab673efde0f136cfd1534345b310aa4b9b6921265becf11a131c0b8f22055e +size 685518 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index a28eb0409e..830ee6ba4e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9af08d8a7a348ecaf77862218e69c41511722797c89215c87f67974c2006af21 -size 804922 +oid sha256:3e03bedc08d8fbaf784d81bbe500a45adb6ebbb67b0cf65773edc65cadeb51a1 +size 805022 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 6a34cc852e..6c0890dd54 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9c7a6f56bf397c2b4019115f76185ee634716c0146249b3e353631fc663f5273 -size 715078 +oid sha256:f06af61951fe9333c398f7628d5d3ccd94a00d1485bd396004f6d212ed5d4a28 +size 715178 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index 2dcc535f47..97224938c5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7d20a86b3761d9556567e405303e570f762e0fb03dada4bc83983a14e3e897e0 -size 843278 +oid sha256:0e98e3bf2a1ab804934ea43ed82364fcaa95056543da9192b8cbe57a39e74091 +size 843378 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index c6f99ac489..0e0137750d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dc93644eb7fb5e951bf3fa449dd9bc1787ddb7b4e5312e653b72d3d7f83a171b -size 758664 +oid sha256:0e3fe65a8f419a0a646d9e7ddf660b48b6c0c62e6d337ad37615510426caf652 +size 758762 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index a92c4e884a..4fe867c04a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c87b7131f67f0429b7b512c8a17aa17bc5093bf2aaa622653899895ecbe32fa0 -size 797770 +oid sha256:812a0ab8963c9e84816449c77065f28cbccb0a87ee33cd14cea1ed10ce1e5445 +size 797868 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 0894eb9449..75d962b695 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:11f9fd6091c5063b8e9117ebfb42dc80ed5bcbadc222da9e7c2064ec831b91ef -size 707926 +oid sha256:5b800004137c2408efb2366bfdbdac4b384a0585784331aefca40d5854415714 +size 708024 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp index de9f199799..008d11da73 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b732dbc789fa1d15d5fb949fd176eaf6e313ed7d2cf0ac9ddfa87dffe906fffe -size 836126 +oid sha256:cd6a615c61688fe0dd8b583cece9fadd408c63ef5c78eb3934649ffa3d0fba59 +size 836224 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp index a9c3388ee6..dd89850e5d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef3d90eb13f8d1a43ca5e242c4fbc36cead397aad149e3d2cb2a28de2fd1c456 -size 751510 +oid sha256:077b42b0e51dc501fa1432df87a73e91c6395f9973d9b432d51abc9b65fdc225 +size 751610 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/kernelMetaInfo.h b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/kernelMetaInfo.h index a500359458..5db3b03164 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/kernelMetaInfo.h +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/kernelMetaInfo.h @@ -25,7 +25,7 @@ namespace kernels { // clang-format off -#define TLLM_GEN_VERSION "39294c33" +#define TLLM_GEN_VERSION "78bba3de-dirty" #ifndef EXCLUDE_SM_100 extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; @@ -159,578 +159,26 @@ extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunked extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; @@ -755,10 +203,14 @@ extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32Mul extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -771,10 +223,14 @@ extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64Mul extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -787,10 +243,14 @@ extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrCh extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -803,18 +263,34 @@ extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrCh extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -823,6 +299,8 @@ extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64Mul extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -831,6 +309,8 @@ extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrCh extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -839,10 +319,32 @@ extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrCh extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; @@ -867,10 +369,14 @@ extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32Mult extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -883,10 +389,14 @@ extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64Mult extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -899,10 +409,14 @@ extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChu extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -915,14 +429,34 @@ extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChu extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; @@ -1079,6 +613,26 @@ extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512 extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; @@ -1103,10 +657,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCt extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1119,10 +677,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCt extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1135,10 +697,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunke extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1151,18 +717,34 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunke extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1171,6 +753,8 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCt extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1179,6 +763,8 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunke extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1187,10 +773,32 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunke extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; @@ -1215,10 +823,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCta extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1231,10 +843,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCta extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1247,10 +863,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunked extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1263,14 +883,34 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunked extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; @@ -1427,6 +1067,26 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512Page extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; @@ -1435,10 +1095,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKv extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1451,10 +1115,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKv extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1467,10 +1135,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCau extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1483,18 +1155,34 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCau extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1503,6 +1191,8 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKv extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1511,6 +1201,8 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCau extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1519,10 +1211,32 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCau extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; @@ -1531,10 +1245,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvV extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1547,10 +1265,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvV extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1563,10 +1285,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCaus extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1579,14 +1305,38 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCaus extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; @@ -1611,10 +1361,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKv extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1627,10 +1381,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKv extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1643,10 +1401,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCau extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1659,18 +1421,34 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCau extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1679,6 +1457,8 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKv extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1687,6 +1467,8 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCau extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1695,10 +1477,32 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCau extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; @@ -1723,10 +1527,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvV extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1739,10 +1547,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvV extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1755,10 +1567,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCaus extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1771,14 +1587,38 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCaus extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; @@ -1803,10 +1643,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKv extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1819,10 +1663,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKv extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1835,10 +1683,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCau extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1851,18 +1703,34 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCau extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1871,6 +1739,8 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKv extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1879,6 +1749,8 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCau extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1887,10 +1759,32 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCau extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; @@ -1915,10 +1809,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvV extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1931,10 +1829,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvV extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1947,10 +1849,14 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCaus extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -1963,14 +1869,38 @@ extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCaus extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; @@ -1995,10 +1925,14 @@ extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKv extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -2011,10 +1945,14 @@ extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKv extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -2027,10 +1965,14 @@ extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCau extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -2043,18 +1985,34 @@ extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCau extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -2063,6 +2021,8 @@ extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKv extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -2071,6 +2031,8 @@ extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCau extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -2079,10 +2041,32 @@ extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCau extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; @@ -2107,10 +2091,14 @@ extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvV extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -2123,10 +2111,14 @@ extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvV extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -2139,10 +2131,14 @@ extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCaus extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -2155,10 +2151,14 @@ extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCaus extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin[]; @@ -3002,578 +3002,26 @@ extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedC extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; @@ -3598,10 +3046,14 @@ extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32Mult extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -3614,10 +3066,14 @@ extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64Mult extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -3630,10 +3086,14 @@ extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChu extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -3646,18 +3106,34 @@ extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChu extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -3666,6 +3142,8 @@ extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64Mult extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -3674,6 +3152,8 @@ extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChu extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -3682,10 +3162,32 @@ extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChu extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; @@ -3710,10 +3212,14 @@ extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32Multi extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -3726,10 +3232,14 @@ extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64Multi extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -3742,10 +3252,14 @@ extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChun extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -3758,14 +3272,34 @@ extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChun extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; @@ -3922,6 +3456,26 @@ extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512P extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; @@ -3946,10 +3500,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCta extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -3962,10 +3520,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCta extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -3978,10 +3540,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunked extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -3994,18 +3560,34 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunked extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4014,6 +3596,8 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCta extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4022,6 +3606,8 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunked extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4030,10 +3616,32 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunked extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; @@ -4058,10 +3666,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtas extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4074,10 +3686,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtas extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4090,10 +3706,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedC extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4106,14 +3726,34 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedC extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; @@ -4270,6 +3910,26 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512Paged extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; @@ -4278,10 +3938,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvV extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4294,10 +3958,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvV extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4310,10 +3978,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCaus extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4326,18 +3998,34 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCaus extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4346,6 +4034,8 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvC extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4354,6 +4044,8 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCaus extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4362,10 +4054,32 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCaus extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; @@ -4374,10 +4088,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVa extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4390,10 +4108,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVa extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4406,10 +4128,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausa extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4422,14 +4148,38 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausa extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; @@ -4454,10 +4204,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvV extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4470,10 +4224,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvV extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4486,10 +4244,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCaus extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4502,18 +4264,34 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCaus extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4522,6 +4300,8 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvC extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4530,6 +4310,8 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCaus extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4538,10 +4320,32 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCaus extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; @@ -4566,10 +4370,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVa extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4582,10 +4390,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVa extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4598,10 +4410,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausa extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4614,14 +4430,38 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausa extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; @@ -4646,10 +4486,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvV extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4662,10 +4506,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvV extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4678,10 +4526,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCaus extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4694,18 +4546,34 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCaus extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4714,6 +4582,8 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvC extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4722,6 +4592,8 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCaus extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4730,10 +4602,32 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCaus extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; @@ -4758,10 +4652,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVa extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4774,10 +4672,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVa extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4790,10 +4692,14 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausa extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4806,14 +4712,38 @@ extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausa extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; @@ -4838,10 +4768,14 @@ extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvV extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4854,10 +4788,14 @@ extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvV extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4870,10 +4808,14 @@ extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCaus extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4886,18 +4828,34 @@ extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCaus extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4906,6 +4864,8 @@ extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvC extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4914,6 +4874,8 @@ extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCaus extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4922,10 +4884,32 @@ extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCaus extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; @@ -4950,10 +4934,14 @@ extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVa extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4966,10 +4954,14 @@ extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVa extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4982,10 +4974,14 @@ extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausa extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -4998,10 +4994,14 @@ extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausa extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len; @@ -5748,2846 +5748,2846 @@ struct TllmGenFmhaKernelMetaInfo static const TllmGenFmhaKernelMetaInfo sTllmGenFmhaKernelMetaInfos[] = { #ifndef EXCLUDE_SM_100 -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "6afe158b241c60a05f58ff28f97112521c155f8f10929a5bf1a1e3c4762ca76c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "3c06f429843b24a4b032897c829659348fd66ee12dd6574ad19e47bd09178046"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "b9edc40be90241c8850a5db27314dff318af4f31e9ad0cc8f5a8f6627116a98f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "5046327aec5f9d7d8a3134ed2dcbcdb30a70f21b7886903baeb938e8cb4a9c61"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200808, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "8f3f1a1c01a81317f280b774f07e0fa4d26c35c1430ac6d26287415f4274b6ca"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "4f62d08da8dc5c7e596af92f5aeb969d0a11c0916268e3e0d7b84d21436f6cdd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "0347b8f6b37cdcf52bf8d15c01bb1ae96c12f719da138e22a10b9cac5dbb4bcf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "5a1ae827964693a4e8d0a2baf4825b6c82ed1b1570f146c08c89d484c6e2efa8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "8d2d811791325a5f93606937efd6fbef86e1164626def34952e22226e4cd9f68"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "46018a3147f776e5795fcc8fcaba6a10260517410d32922113a43165d9592460"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169152, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "3b38078742f0e4064f516b8ef1b00715fad94b5247ce378f6a12eda3a0e9a8e7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "5c8dbfc095bb7666a38945f3a67a1a5c7ca7b59fe61a35a426867952e9b9d0fa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "5e9dbe4faa95a07e540fa0f693ea8bc5d2d13b0083246e23303deeab5a22fcb8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "f1336cdf2d10e317fc798f476428de1fea5232e6a523ca2ad8c4de07c8c5fe03"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200808, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "9568b68470d9f888ab812d11eb05143a7c57d26dfa8a8b9b3e768707a7c1ffbb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "74045a28b86b9a0cc69867ee1880681c7197cb0a0ae619171676f891842244c0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "1e66fd072acdf0c3010468cf26d58141891eadef455bb1b3b26fd7410f5de468"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "02dfc64670799066b81174909840be9150cb379a6d6307c2586b985d6a1fb101"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "e6162023daeb08328956085de02645c6ee6fa08b5e6ff1cb746ab18a35b4d97d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "b8ddc55195960b42d803d1c98634729e4494e2a393517f8f24931d29c6f8917e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169152, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "93633d618552acad15003ef21a52a6bd914623911c2f394c889465dad8c68c0d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "a7da05dc9ef8b4cec36522403588675ab67d35129e0495f256d195b1361ba459"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "6d7030d5cbb988b4e69f63dcb5d8fe4b1c9aa023a2106ca7ff9931cf80c255da"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "11e719ef46c6c486afd7ceb1424c1c8b28ea14a3c2111894d9915d06d69daf49"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200808, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "7fb0a4843f0c131956348679dab725e00f0c1e70629640af18b34761abde2c66"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "a8cfaca15a56f889fca4f705a78ea4407527881f510505caa091a9a7305120f0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "314c23ba51a316533e2393200c29ee2f03d0cd47bec25c2fb2bb4bb2d0e6f536"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "eae92294847364e2de25924c84db1ff2ed8b7694ff0a8efc2694a8237cf903d5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "62b6d8f4befe5166555926d5e42a079a241a7158b5962d90923910c84bc92489"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "5225ca2679dd22d30ccb2219fe5e9679ebae1ec3bf8672bff911029cce588e3b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169152, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "b1de6435598e47c44d373c5cfbd3d4811bef4392dd647643b598f4859d0e4bab"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "5d1ede7be717a41805a7c481fe80d93dce7a797e3f3c317d3b86dfa993c28213"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "bf180025341fdc9b4bd727636d42580f8c33ccac45bf0cbaafbfc5cf3656600b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "ca3198651c2dcfa606e83c5f46a8516fa8dbcaa69c6b2a1aaa330b7c8a0db5cc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200808, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "6658c4a47d0913284c3951079e8061b976852f38b271bac115585397d13e5120"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "c46145644b9cc0ef3206e0123335c610fa89b572cc0521f677284f7f54436b63"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "ad0cea0c6be1c27600a121da1f4bbb2db0782199b0161db4619f9eebd16a9596"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "0c921da2d6eccf486b154ae3a68740531269ebb20e09d574b8a9ba91302da298"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "6d100ad1f3228fa492a606b1d71bfe6013638a8d01f4fea5faa2771a3127ac85"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "ea48594675a7512743d68be3af2acafb04775a32a6fb15aac8d869d0ebd17f95"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169152, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "2f445b5cd78c7bd34a851cc2d0143601db766df04b0c12e4a92d1e7a422b1503"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "82bf3ad14e99ec1a5c9a28790ffc3620c9c187d66564f7b0466ac6df4216b000"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "0a101212aa7adffac341beb0aa0dff7131528e1a2cb0184f80621bc43372f463"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "f12988404ae65bcda0c722f94e74ef6670aea187afc8b756c6c9c1bc0c037992"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "940a3d31796d3c40ee020321f229b72f26cc149d4276e969bf48e736bb73ceff"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 224480, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "4b1576e699a1ff26f9d3b1dd2910835f994c9d0ad9c09381a8b95220ed0a9c9f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "e89d1d8c2ca8a2717b0c74a9fc641dd457f258396bfab0423a9a093ee712d7d3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 224480, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "6b056d09e19740f07e728a43fd5fd6c9f14ddd34e0e6e1ed9aea8bd331498ed9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213608, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "dbde0c596a14aab8284622fe76c57c39bf3091fc5a25c3e196e6c95e6e2d3885"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "4ef711abeacf734a3d8d75c130424cf3b8dc2fd5b475dabc7b49ae32d12503b7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "0de40bcc8bd23494a1bb218de5a2fb2cba198cd4794550490a148b7f176df01f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "d63e88570bc509b20b9111e9bda081a1d405353624e5be1e000d442e2ee07acc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "24ad6fa36f8bd5372fa4dd1b9f8be95f8084c0807c097e057a74b7ed73383138"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 224480, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "3f14950bae2122aba605db515d3c1065902c44fc587a928c818ffa39ce8ea71d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "f32551f858294fa0909fef72663a9f074263cc7c60e146c46beeede5c0649f2a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "e71e8abed099111c05b679a49182661c8c48c5c2a460e3e4859152aaf047313b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "33ee27edb0cff1d5a51d6bf1661967b8b6162323b4818475322c9f3b13eafcf6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "7989c43804f6d734aef59649497e5c2e39b38d9ab7d597a22e521d3d2b89c93e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213608, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "a83296fbb069cb4de90109b63ba34e8cf3964cb74e98ac005202089e4254ad42"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "33f96d1976bcedf1c104e6d7d9381a2359bf7b2b0423d48d5c50c7bca755ecf1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "b685e079b5d6db07647e23a591629bb55606ef78288bb1a7319735f87a47beef"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "487d32995973647dad0040abb8f4ce20c3b50f8bf57a228327edf59f40fdd746"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "49d6292da76e46c6c77b72bf6f8bf3ceb985331dbe7bc5321a21c5a2a869e0b0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 224480, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "cd757e82acea71564690bd0123f9671440c4f862f497fd19d51dde0acefb3650"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "52bf7a1597216f0e2a661705d0a3f42a13871f7913d4229ab91fa99a6e24b39c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "b3350eb3cb33f0481c620cea14d4406f359e8b62110a2150eb79571eb00361f7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "e4a1123e7a18b66541fc59e1d6e472ee20d9407ef6c4070acf153fabd887f75a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "3e924ba022c425ee9b644bf94832c6124eef586dc95e763aa16cff62056fb7bd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213608, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "410b197a06b48d05d5a7cf754186fc2b28f2d93c5315103a500e8add1535a8ca"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "89fa8a483f9de3ca71a59a816588585f79f35ae1644eee0bbbbe4c96cdb2d5e9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "5cce6680a1cafe8be1dea7a02409c9606667ceb64ae21acb38a61f7db0fdd43b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "d0fe8a738bc11d13b62100c440c83ad0f34f6b093c76427134b7ccd52474540a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "e61a3f1d4b0b6d8c16344b1a0afa48b1752af0b44946068a221acf851a7d11e6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 224480, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "06e6aa67a9ae2edd5925661f86bec6b761b36af6fe31da0aa6691b00eceafe31"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "b12e03d84ebb785302df539166be947cee322cb5d488b9b75dc18a25913f0ee5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "8c4c259b29eec91ca2bd26bc4a4fb8514c6bf94a55c82e6cfcc79fb15f3cdaa6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "428ea8c21cd8481c321fa731ce0b514d0a83b1b6c8bc6225ebf07f0b976543ec"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "7af8ab639930633d481f1d44d00e2f0d060f5dcb1b3310fcba7fc3e1c49d6e9e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213608, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "63597d3562cbef30e89cc0c2284ef8502b05445fbf99ad402edd7ab696999c27"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "8a64b33d57735a882617e51046ee628bf8208ac9995e3b7d3dff26ba7a2c3645"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "2187e2b520c7a8ec442ab67ae0999091450e20ccf995aa39f5e2c92cc7519737"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "158e7ecae8c680b3db12624b0494007c454f6fb2f771c4503098a547fa7b0609"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "9fa556ff121d4f8c408747e9a259b0f7917991aefbd1afc6a86e1f0bc2e01019"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 224480, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "10b78adbb808135e0cb8b9bb2a7e21615974e42cbccf0758abcfc93b282ef8d1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "07f2e687dcaebd2f623bdbbb7d168f77f4365b494e57de32988063a1e2fda290"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "08a56765d6d6bc50b2971ff88d2aa6c669360204665dcd0432c4c7afe18b88dc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "ba2bda08d190751f1effd8a346ba5afd360c94390c8cd91ad816f44524ced4c3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "96a5fb31c4a6f28a62c60ea38d4c51520673fac327d1c91c600ab05b521eb096"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "dd8b3b37fb742c9d5359e1dbb73f1406590496c9390ca2e527c4217abdf53a63"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "4f23a87e5e3ed72e3a11aeade716d9350bb1dbb0e77a5ca41c4351be9d3e3a3c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "3a60f23b95546c588bbbf5926a3e65c661f44bb8f1232ef7b05a52596625a866"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "36b13d3dc07000622e0432510c71850d9a677dd124ebfeaacbd4ebc1c870685d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "deac16b405c94d3c9c8c54275289e8db9bb82a2604719b39c53aecb2c8cebd59"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "f74a3f385d99432494d4024f2bf6792c92f5fdc15fd8427bb666dc11cad7a7f5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "abe3db0237e5da2a253bafa304f88615b6d4054618112f68ef248d1536e9d6ba"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "bf23eac6b4aff096902940bb39c13077bfd5886038abc3bd9264a01b094b6dd2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "55422aa4ec359ef59f511fd5b8652f8fbd04bda2a491f8ad59f1ef85e760212d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "04b8d9c60084753a386fbb4cbf3a4f924ec313bb1a6fc7ba9fe6b7854f10e02f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "d42c18374f72a74c19de3ebc515b56c2cef11683e8db991900815fe249f33773"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "a1aeaa9708c24176385c2edab763f434a34e6c67ba15ce2ebe4749d377d087e1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "cb464f3e0c52a7160cf73944412cee2b7ab8cabd4b4e1033887e79003550402d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "b00953628ad614d2762ce56027b5debeb9975da7a33b669681406241d32b1d62"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "8157fa30c95c8629b76285fdb82fac6036e03245045ab56b7bc4a5006103a4b8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "f7c3e5bdc0f3be48ec13530bf33dcee157890f80086e4357aec7e75d3064d170"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "19e14e6fdc3af43b7afe1400d34e42d983e1698480149ceb7982c9320ef062df"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "a313d83488f83a9990b49449f2be940810e722681018dc3e22edfa1679567ddc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "a3209e54586383acc76350fb29a284b41f16aa0dc901cfab21ead5f8558f0a1d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "a7d68fcf042f125b8b3bf32b0f8cde71814f33cef33c4cbeb173db050b82a439"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "4fc7ff8ad5534c7e5e9466f95c66deb77d8f6f6c006577a81d02bb55fabc52d3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "b4a894b87b4231d29e188c512c7d2a845c1905cb16ee2859a146f8e57af3ca37"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "fcfcfbc48aedfdf7e3900b429cd6a1454319a2207ad5aae52aff6e79f340a639"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "71afcb1c493f040ea2700e5f5f74d7e66878c433d53a73395e5d7195fdcb2078"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "16cd431ab1a7b67080bc9297f35fef1285f39430868becf95f425bcd992a41d7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "0e89b284ac42ce5c7ef71662e5c800be5690d7c515948d86695bf67d575c9457"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "a1310d61ecec8a776e599abb4e3d3d51872c22946340a9deb5b02d2724da63e4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "71711bafb1b8332a82dfd04e4f6e32b06f3961f90b5497d4b45b8494ee2c0749"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "881e59bf8a52a430dd877d65c17862b079603c39f8bb9cce5670c8552f804353"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "4c321d91566a5dbb657a8b4b728f204314bec70dd3f44d47c7b7980cc0dd6caa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "6f376f20d0177961cdae1bd6b8fb9975fa10bf85e6c2896463d213f7e5c1e968"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "dd88e7f9182677942da5b512024f0e89667adfad454878d8b495e930c77afc8e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "169955b8973431ed444a1abed17975a93c4be4f76c37638f1ca89ee71b55ac97"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "b709fc158dba0dc2fc0521922a680681c5c84f8739b77497f18901c885986102"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "8c53d0efddf5e0bd3f3d85971bd130dfeff9dd939102be73bfaccad36fbfa4ba"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "0411e025acad3bfc9dc694f2c3a38e04ae5b926beec8c6fb0ab744e67db81743"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "28f6ea1d8996b8aa2eed9ae4bcb4a53ab3cbec44d7499dec3140dc333d553c09"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "0a34fd0810afb9315994fe719668c0a54e1ae709418e99cdd401f9c43a1a93b5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "babd24edc78ce8ebdcc49cd7346af0355f98d6c921f90ba61b03a6a8143f82d2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "e067e79decebc0bfc18d1d88ddbcdf548fe0caf0007017e58ef434e2fa52c4ae"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "74059e57273b71e3cb2ac745a27c9186e6426157e527b318d62136baea604e5b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "574ca755575b378a02bebfe7b0a409c58cf78ad6c399d251eb842ffe2a88edd3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "5285a22f8f9c211d488b6e3c3b1f1f194ee0b85320e983de5fa05f3fea916036"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "d79fbce000343e7d549a1f267b8bedbbc8c3cfee015009a9a79b90a8d28c9965"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "8bd799ceeb2e768eb863daa4c3ad477956eb1a03a3ddc54b26f72d8c6554a6be"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "630120b85c92c22555473105c8e836cfb0f7cbaa4f96b3d9ba13863461aba9e0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "c51f74e3d07e17c1d5be993747838c965dd5de96155a275bf51f4c20ee66f20e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "e93048fd5aa3598df555c0841fb9e9f983b82a793ad803f6cf6665d2005d9977"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "b19f3c58bbedddd6472fbb8ad81227f6fad0a48757107d6b5ae05663aefe2f30"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "98faad5669f25df5bfc1e048883a4083cfda3eccab6a7f276035b0dfd93f30c5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "5a2e938f1eb13394a6bebc01be20a284adbf1dfb5c29ae8b4ced12265906c8f9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "e0fc1e5df5791a477fec28b4f76ad57bb77e5af342e300d6a091ff4f088e28f4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "17d8e849732b00701223a72ba8ac81a2ae343c73e41bb6f7f9509e83a1d53e9f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "4c39a749ce7e58c1c9abb233ce6533fe1097a7aa25782fd0de13eb4f57fbd873"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "14314dea06f0154d30f0f56eadd422ade6fbbc1e3f31a174722ba854488a0917"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "fd5edd6d5cfd68222ff9b768d2e4bf88d25eb286f9d7ad29fd5e509d9b8f5dbf"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "90c6cccfbb3d9cdc37d317dc87dffd63a2559cc74843df49310ec80e0df5a2ec"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "80cbc10e1d3e88c54de842f60bb0d497a68266df1bc42e7cdb3c6fec8088a605"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "b33c39ea05d139ae84e9a5bcc11bcf92b0d1253147233a00e827ca2dc61f83c5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "a31dfa6c0af82a4ee8a0c4a00020538cf81662a181e77180f2a038e144fa0ea6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "3b6ab3cffbc5d443eb474cff8564863aa889e677d309ac116fec08c4668948d1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "00f68a5990ea9dc0d24cc22b2b5c1d8fcd93f52a89a924fec5d5d294f5ee5352"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "cb9120a4008dcdc1e92e9963d920f0a09dbe0d13e003e500cceb890527add79c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "c942a1b767671d3765aea178c9a7a3d80041e06c03ae5db486e057255efa8f52"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "6b13ebf05d9104d9db7dc8e2618cd76c5d70a7a88020bc806632c5191f3d58ac"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "4d12090867ea1c513efd772ba9ca506de1a4be880cb3c1e8f5343cca14a4bff4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "f44103a5f1cc46d60107893304fd2c6bdcca1b712bf9ed28934e2eec8b87ed7a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "dbe8aebcec5ed3ac49b1aae01f4c30b19fb4dbc2f6ea9135cfb141f9cf7546f7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "73060692f4f1d1deafa3acea0f7e80ae61f1db31c8fb286c7d390b6197ef3e1a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "5f41e444cbc0378f8e7bf2f5d6feb49f9a519c2a2171153b39410f13e54113fa"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "543a97030e132f6b70c3270a8c2e979485626a4c120cc3188e4b805ceba0a6d5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "679770f1f92002ca7c51a21e12d8a2afe2495f0cadc873e9e4fe454f042d8694"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "12ba4384bf6e569377034433a2b843280221e79d246c949b868c735a25152163"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "0d710718086aa91f6a28c607334a6317c60df3f4b865b4c28c9b1cafb20577ce"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "261fbdfba813530ad05b546b09dc0447da87b092e2f09db4ebcdea7d15632241"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "b5ad6202fe0cb685e5bbc7b2cc9c43b76b852794d8157e6390b32fad12bb49fb"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "5c709994ce27b7c962003547b512897d5a0987bd737c78ddf8e3218c35a3f92a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "ff672d876c75e1036bf337cb8fe89776186630ccbeb46c14423dffeffc1dadbc"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "cb154b53231e763ecd42a171590316b31b74d791e2f90d8cc47f957adddb84d3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "0f82985319335e1606f0685dab0afb12415b8e1aa83c17abe4f26a5cf7e0ff55"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "77ba42b3fd17c70aff135f147aba82c7008c814f9eb5fc4a6478a170b788f2e1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "81107745fc76115b19c502a662bee5c9309ec8d2e87591a61aec1f5eb97ed0ad"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "b5011c5a88e183c1173cd4d347dd20cd5952804489abdd49f4df29cd7a20bdd3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "9ff81f379df7ee1df0b9e59441eb2969f732f4b407f40614c26c3b32a21c68c7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "4a20ad444ccf6b3cb1a521f4c24e19d09fc4e229ba6ab5a81142baea733adc60"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "69f66b6c81419d73e20bb4217a8d7def81d6dd4a4d6e60b15d8a21c41590b0b7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "d6badb588340f6c6d6ad7e0a1dd43e5bf7006ed128664d16a636bfdaf6b63f12"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "4be2f3f0093527688f003ffa8d5a18e2c13ed9b8ac181ab1233d852ad684e7e7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "7f341e6f5cf4d481128ca776f0b012028daa04945a8f0907904c7be6584a0170"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "153560cdf7eec6d58dd6f25635510482a939077e4e46fa1ff56ef8cf2691cc51"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "7a1ccc9c07b337fb85ecbb17dae7c89356adb8aaff72219dadf7f6bc91ffef48"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "cdbe36acb3ac721da4ca50037aa0ed2ad5c750cced2607886ba23a6aa29ed1ae"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "22732e3d438027454181713dcdf052609009d11418a44bb0f277eaa74cdd10d7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "a9b00494df0f30d2271af7aa643bfa955cc03b9fdb8dcae42bc721c0109509fa"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "48d8168722820a048b3ac84c9a1d6e6cb7aa8ea51476268efa42c81f6e0d3b40"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "374ae2a276bc583fbf1d4596ee09ca63123897f44507eb57451d72e9ec754047"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "375dd28e6fc693f06f7a7d15a068c74dfb1899027f72a12adfa9c69c5a0e17c7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "0caef06ee159c7d1acd48f5dadb32fd440410a5392c2ecf2eec2c907c57dc46e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "42c96b636fcbf8af265cae9966c3e27ce19297cd966ddf1a9c7ef8a0ab4ce7af"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "fd14090d19b4d5e58eb4185fe949e7d087e14dda5162e821f79351feb6690015"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "62c5a5c7ba1c22345117c7dcf9e4030060da67dd02ab49feade1f2b296a83f0b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "2539ed93c2aa1a7596cb8530319498a9b1a06b49ee020373e23e099b4ff39b8a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "dd4187f0b2385fa280e2c708635e39cb27367f87a55acadf5c842e0dd33d4efe"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "471b99b0d0d32461f72c98c0ea0a370ba8946ead42e619d743e4dfbf7fcf394c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "92776a3d239e4dde2052cc9308a8d210b466a3f8d28bda8d919b04e9c5a12b73"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "e79e940775af1ecc18f8e698a44dbbcfc727fea37b6fb1371c42940600d3a4e4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "c66217a6b1acfedf1f3c02a285a4c7972bc24ee9c79731350c591b5758369671"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "c7697efe24237b5f9244ce92001d36d16a02ee82332a561cb86e69b1e03022c1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "10aa9f6556e51c5073cf2a32acdd410f4aef8c1795d08aba937f64fac0316095"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "f1a2b44bae785e46319790ef1b45c117f5d2eb336bae319aa6d57ec3f6a82ef4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "d15d7521d0bf7c40c8bc6478de2f311ba029217b11924c655107e2344dc8ab11"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "822901858437e9ca8638a13978e9052233b5bc7002c9e5dff0589aa889f4c09a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "e38836551a30b503ac8c44848e686b16d01b12f60bd8db628c9c9180d7c0663e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "403dd2c81a2b9783360eab7f615a145b985766593a819c15a9b9a29e3f424d3e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "4097fc7cc1cb0ea2f44aec25f2f51c64811a073f13bf796ae21c77ed50f88e46"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "01c77591d3244d1332163533d2d283e6826cfa5556245b01315cedc613141800"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "3e6d109da95a5d6bfcb1d0e2ab23369d6a288172261127181cc8badf52dd4da6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "2a2f823a925c232a6cb3a179f1cd3d287a9f01dedfdad60e7e396b5d74eb9c95"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "d927c1a273dc77394208c4198910731b2e731f541371f68c70150b56fc319478"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "342ee671c97ea3cf7b81aca51e05d4ac3bcd57986eb892c46d7a98659d98e6a0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "b86e02207890ef1983f5c78b1d1852c8d5ee9908df341d4c1887a2c4c1278c5b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "f1d511a22b7bf0fa443938d087529b6f34b94f65c7729af66bb5c41cd6a7c908"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "dd6ab6756f4ff9dbd71636b4607332a4be8052bba0c9c9e4e198a816cd84961c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "716b5f476f40666e6f2961e3e5c0a6024337811f77381667ebd7348ef9c6390d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "ea59d5c2f473ab075b7678f4cc5b23f73d31fc7ebac3ac193e3268f21c0ac378"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "536810ba5ace258cf7f7e6cf20c4ac2fd1653804bfc6b583d71bb1d08435d10b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "019dd9e8007b6be6763f35f15ffd1aa4358bc4e6560a5bf0ba2f7f40a723d63b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "d77eece6390db63cdd7c9fbce2107889af0f1057417111e896c71755e857afb2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "595464dd30668e1de27a763ba09eaf9bd6b18b41daeb0b129c976fe75ebb013f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "d3b70e8cd6e2987f48acfebdded373e572e1d1d7e3211f792cae859dc5f45790"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "51c5c34e890e3cc601cd3ddc1dd4408e50dad0b3c2392c0f0da73af5dbc9cbe7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "207c849dc017c5f65d95312ef47540390a93acfd17d4678d547f64f9b423179a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "beb162d45d5445435397b16f5b4ded1343c25e9d979db40aaf4703e249296505"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "f33c0e72309a3efa1615f2de7f7ee1a8dbf38ad9611ee1371d78aed93106e42e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196976, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "353fa2714dd0033ebac342b9771e9d8648bab38872d47df62dc542230f1a0271"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext", 196880, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "085ad670f12a6051a9729a7aee6642f970332164377a5e38c243a5943da6d382"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196976, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "57822579007a6d45a6e552cb4b69b799cc166989b6b12b595b7bfff7b850c8bc"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext", 196880, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "4f960e8582a91149501c4607cd8672b423ef4c766b2145fb96e224750862c821"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "defb9a1ba30cc945a898fa626afd18048ac354ce7d7741066054c156b97725fd"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197744, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "2e4985ae18c77a05b7e7627b28677266a3d41d1930c98036292a9db5f564cfd8"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "688a27164daa44590dd6623f9a94493b18f49802c470ca133279bb8831995791"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197744, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "b49a689f3c5eb24c059349a11f2418d43d10186ca02e0f2cb7bffee62e100c60"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "e2b22826cdcbf41a423d07ac3f54a87978697d1b71f237dfd39e7836d6df8a03"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197744, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "223138d36c36e57f6b4c82d9c6f93ba867fb67b273350f5f13301b5137fb048f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "3d16bed93d3dba639e85a3d7e00c7e1d3307a43bad85aea6059ec2c79edc6d50"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197744, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "5d6109203def74d9ef777f23be4b4b688517130b2f348a246c2cd0c7587c48d5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext", 196976, 512, 0, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "0d3bc36989d8e0cc7682d1adbe4e786478b4d2ab32026d9106dbfb8b261b6bec"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext", 196880, 512, 0, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "d0fa5ed0204dcffaa217620bd8763513407de72100a8336840e3beaedf790fa0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext", 196976, 512, 0, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "a68c20a9ad20ca7a1a66a8ccd406fee48a14b354e9fadd1703b82c98db8c7a17"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext", 196880, 512, 0, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "ae146557967a6c6a26650e30c58f145523826951b584071d549ec6865ac75907"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "e68f3b89fe947d539885059817eaf8e3dc07ed74d19e5c1aa4279cc3b92263cc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "064ab75c65e7c8cca91319be5f6abdd889b70c4bfce380659a4d49a70d778872"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "a6c9a92f1c0fc70f73f0bc3926b0dbf835c4f8fabb6374f8ca2e6a7bbd2a1336"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "aa29812e7cbcd4feb230d77c0787c0a4c02ddc34d77101399923d1007203b820"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "152d170c800063c2faefe322797d7e5cd9a9e7948641e2a9681c399e671875ea"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "eb17c650a9309a3025fbd80d236267868fca352eb255fe2abfe347440c1a8721"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "00a853b24d02f2f97b529ced611f3a3200943be198b32a1592400099501c71df"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "6dd398aaf779df1708e23cb32e1184072303bbac48e39015b3d978d254bd175e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "b80a5e934200d21f3b57c8a4dcebd95055f55c11a0c430cdae279260d083e1b1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "51ab65df04bd6f7a3fac0d344b33d75e770916c3787f5153abdd32b0fd46ed8e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "9a49d798df06ecd99644627975847e2ae9d61729dba7760e7dd96ccebd8a6dc2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "5fc870737d1f60c8c9dc1a78737c67df0112dfbc66817802faa7833998947fea"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "4311afd3d29a7ba576151fef5d4e67aff180277bbac194aee03dcc923eaa0549"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "ec23629c28d9a21451fca1828e9395f75954021d70d2f22a0a7e89f70773f845"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "5f938f8aa4736d0811edeccb16da0d8ecf7b8325c3e8e8f2d93cd794bd932665"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "5b23b7335185c37f25a8e4c112aab17f3ab010ef35e5874f98396f34e0eb7f07"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "c4419ae8474d42a45dc660345b518dffebb0a57b9cd840b0e9bdc5e19f9c432c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "66b857deaf8c47fcf38a0fe7e19dd9f6d46adde6a0954921b9a28a50bdd66d6d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "98945b6256e5bfd7c4521643c08d17bd28b0684fcef43ea60750773e16d7b0fc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "649b913578546774c97e237c4179b8c6e4284cbd2b4a38e4f78b1f656aefc738"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "2ad3fd4906a60f5a1d32ab0be8a911230bfd6e86607af60b471280cce3b8c673"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "284fe2709f8145831623d0c4be44bcbfc0521032a0efd94dd48d1f93ee432ef6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "d0cfb6525f24f4efd5768689918b108080f00e3c95ba6d5e353494094e1f9656"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "7dc335b13726e6626207066eae678d9a502abbd8987628bcea126e7d4b0e8a05"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "cdac527c03bce51f763d2113393f67007265b3188330f706bd3bf069849a1bce"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "d4252de6b51e6baf238837c5f081c04f279967df1696ebcfa1c8e5d34f53c791"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "5886ef3a747266cd923b232dc659ada0cc654ab7c9927e4eddddb4312bb56f90"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "fb6ef96fc49b3c73f85dba367d5c87b5e485411a51393c997be49953ebbe9c58"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "cb70bb2f6bd309aaf4e011ba32f0799fdd29dd811657c223d535fde8c0cdef9c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "37cfecfbb532a8b4c495745021ca2cc9311c9e76b07046c87510c5652dd7579e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "a605d1dc157a4c329b9e5fce4386dc2f57d95aceac62df3c0beb10429e9f7010"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "89c8c87d5075bf2b9367e6a34227483cde06d54163a3f7416af069757d5328ff"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "71e7aecfcf39ceda72af23fc7e5eea1c6224a3b391cec00afe48d75dcaec2833"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "23080dbc68a582cdf5853c1b04f443c5e901c042d3f32ee9b32e484cbac02c98"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "a322071c37ee26ac706bda210104f1017c6f0b9301acf8d8d78630cae2476c3d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "680440a79f5bddf0957a0e67c33806aa1a72b7a224902cde8c3a4e1e3a26759c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "3ce07f8e7bbef03100ce50b400b8ccfab6761d214e056fdd2805a34cb58426ab"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "adac948eaee587ad57f8b9c4ee0718c7c9a0548a000bee35bff25c8f71dc8045"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "610ac88bac44b58b34d5499a8a5ac9fcc64b160894ec403eabe93bd2bd5bb9b6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "18418cf4cfea26ebbd8d55fffab35b30843f58fac4c3b6769295452cf1deeb5e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "f16c4053ce53d2098b4208e3d148e0eaee3ffdc576e44a221af8c422f5150307"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "46fbe69c8a1c24eb72fcc66abe1747c7ca147cd1d5c913715d6b19923651ae9a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "ad7247e5bc16cbf841b451997b859358346913c4b27ab96551b70e767c845336"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "89519aa232be4fbd1c38d4efd74430bc1e3c4e0340fcf256d68f1cc09b741268"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "67ea4036ef9b2b37756f562be0a01647452c725c54821e9b8aadff48aeea57ba"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "c2f04a3ea0eb7f1ba4e0e352c5a7037eb52095830f99471ad44e84fa6bc40923"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "231d74dd8de782821266e3e468a73b8c393be162d31870efb0d01c72cc070e89"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "e6ba36aa6886b0d1efaaf184848694425aa7fff00c636e4c56b1e4872f17fb17"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "ddd3a95d35e9b2c3f6f0106f844434ccb47f7383c3dfb770381d789665605685"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "def1462b6d1b365a7e8307d3cafddcba2e41eb7d88c3ead5137370ac699231b3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "6af4949ad6b866e461c33ac4f1a491009f9578b404dcbba44af4ba6659b04450"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "1434d8ec769dd84b24d30f6e07f49a664f20a7df0b036e1fc842b77db54ddb60"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "812a0f879731d1d03825c1eb7a9355c43f84e7f4263f56ad7506c999d2d38d61"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "2391393fe233067a8e867a109a76d2b4e359ee51096e36d863721c4e58507b87"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "ddfd24ddeb92cdb2b9df4206fbbe8b4304bc19347069ebb675bebe2dd0f501ab"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "340bb41bdad42a068108ace33d5445e77e10061bb840d0ff18bfc870173bfe2d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "93b3d019b3b686582ebdf03d7a5cc0cb72a3c8c203b1b3c0187ed36b06e22fba"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "7ca9098c44553d99d59f81e0a49cfaa592b7abb11d034c5f0d87a0a14a78d32f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "abeec0af631144be83da9aceff3d29729c6fc6877af7fc016773e43e04de9c5f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "d12880e20fab039eb08da9f821562ffa0dd95aad6440074f44db6e00aefe464c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "fe0c7394355c3922b2308c7f542f4fc043f9bc48a9a2fe5c4bb88a54ac66a2f3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "8cfa557e9498b359d2c2b55776177fc2a37ccbecce079ddd3abe49ff19a3b2dc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "bf7fe6f3a4cde80e8e36f3361ec99130bdfe4ef906c0e15942b5cd9714c0f43b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "e043029abd56b9ab6e36637407a9ee4cb6f6d2e795f67b7b67223db18a51aff6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "0a01bdc01b09aae22c367b9a1b4dd15ae8ee10360bf59503e7c36a20c1078b0c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "bd68f289893a4459009a33ce0701d95d1ccd6ed7e210b96b131c5e01e1aeed58"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "66ca67bbef43f2633a34f6e5cc7d77fc6e4e6b7397cb0d681de038d24086a476"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "ca505c0032750cc2e6d09a73df13967c96aeb674ef7a01c94238c13cf2402704"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "e61155186f492d185c31a32e0d59ae419fff85dd5b3c111722acbc71ab602566"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "355fdd1c67519f4e8be6f0c80cf578f47ad5934ccd0e6902a6c698534d2b00a7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "16e2f42d7fc3b7402cdad61f2ff1edb0b5bdf7f04e92c974b16b28f82c966dee"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "d8884e4bf2e56873259abb56c717f4c490408711b0541a833a610780c462dedb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "0295191ddd8dbf7bda913b0438a75667895e427006a7ae68cf809423b7e7db3e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "252ce14cda15330dae9e01beb0e3d613a140df0cdafed587f45882453a5391cd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "a54aa08d4afb2faddc3ef40c3e87c9bf3cfe48703ad0322359a9d20ac3f95143"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "56eabc026b7c8940b96f02053ce275ba3cc4342d6b686816481f19b624eb295a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "b7b417f02c5e82268af0c48f1c97db4158ba83b8e0c2539f118a455e72f5f75d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "fb227880060d063103ea600466b694152fb0429e13461032ceec4e022407e836"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "92551b2f708a12f117d45063cd9f95d4ff00c0c61fcd49752bdd1f97af697dd8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "faba7ab9fd72f4f73890ac772386e316692023c7ff456f1b88826749c5b03e04"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "aaf4d7aa8c7f644bee74be021be8f7819cdb0f53be3279c108dde3cf85fec58d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "f0f7016017ca1d80fae1c3a1db1c8cd1468542a4cb5b4989ff2d720f8868cb31"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "eb8a321f9c76487a4f00519f364e18b31623bb2c447b57e915c7ca654a096999"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "70b0c5ebb54216e1e078ce573b3b14475ade5286963dd416619ef75a5032a5f9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "e511651f36fca507c4f080736336dd6cb0e6fbfdceabd57edd69af92dada6eda"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "25f03d626fa03b1d95f5e0abb1035a35eb5db7fe2a01cb67ac2d1461e1ae295f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "73d79892fe3f2d648a112fc0a3349f1b6e7de38292d03c83b27c32f7e98ac83e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "7fbde3808aa027ce382afab7ab18ede3fdd9d3529aeaa5b7cea0ab17884b89d6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "7510a4739c315639eb65ed22f5b85960538d2599c4d1f3541d4f642784899d55"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "fe63a9b86ee369eeeb39e2beec545532f4afd57efb60162b6dbac051895b0858"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 115024, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "d04cba2b4a63d32a7c4a85a1d2875dba5764b3221e2eae5a75f4566b0fba19b7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext", 114928, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "a0c49f2231f76edd177479ba6a809009dbf338cb442f9759a69d8699828ef247"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 115024, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "e9229d9fb99e35e0ea2748aa317405bbf7f4df234d65872a19dcdcaaf4209d29"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext", 114928, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "47090d2cf08ad2fba5230909535f29bfe284b942cb42b46e879aabcc6b1f49d3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "79b2906e7e91630488103b022320e08610d61c811f091dc6a897996a8a00106a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 115792, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "c45ab6cb2df8612f772d9da0b0e5acaa87d4e2b5a317df058e5d0cd4da80c825"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "2bc2bfaae38102e91cdefb54236315ccaca36f73f8acf772ecf9f8db6f54cd4b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 115792, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "eba24c7eea7b910e40d58f4d0226f65c80ecb0908e81aa02afd3aac8af365c8e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "3f5b5397cc760c815a50d5af8ed8dc6ea974cfeaebfa6f43ed9a585b3dcbe2de"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 115792, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "f61195c447dae3682253fe5facd14a2c1c6d2972ef10652b1c302fa08a774e18"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "aad0ddc4fd7d5121737b6d10d485d3d6c7f717b09eec99ca3625846e4e787133"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 115792, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "c6473311f14fc68f1ca24d4005912ede8bc693d92d12a90e6c00dda7ee086a7e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext", 115024, 512, 0, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "938d606fc9b207a7ba1c7dc55970f798923586658cf40bfcb7fb7fb851526ad7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext", 114928, 512, 0, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "1c79f96e1a8244eb7e0c78a3dd2ccf54e5c2cb29f584981bdd988408c353e143"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext", 115024, 512, 0, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "fcb9511428686344be1ccc8fb4cbe7bd1be128cc93070b013f838652da5c15ae"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext", 114928, 512, 0, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "36e35e1646c2abbb2fab166e368e1d2e33180c3677cb2a270ed5fbd96c5e958b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "f6661c706d2519f14438599ef7f8d2edddcdbb436f0cbfbee70b78aa7027163d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "884455b073b7fbbfe6792a94356897ab46c081cc4b3defc3afc68b98e443b76a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "7d44b19796a4e3c975253b6cd9d24781d25a386e24137af350948a9234411528"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "c4f932277f5159b01595d208269277faf1d442642da1b65d9488ce2596d20085"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "bada23c6a040a1e4e0b627b3953faa0a4d5654ccabac8ffdc9d31b7b24cf2323"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "c3c4361e1db4076919de9fc5551dcc3919c162d8513b23197f917c64acc9a894"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "fcaa632bf970ad17ed43e3ef4c69792db845d8efe0ffd59317c09378eb5ce8d2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "cb55c876471461a837b97d7c6739cefb198757c9f8575c90d4ea9f5e47ac6a2b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "78b75dea52c36fce2ad5cd5f9a5127e262824f6e562dc23caa1c9ba8c41cc020"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "9ec4f26fb7efee1f04d5a7a16609fe00a75218d7fac6f9528c8e868d8813af66"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "35eb0f1ecd2fa4cf91881597fa7a01ccfcc3ed79198bd423c1ed131c61433eb0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "d4aaef3391675009c7747917867334e752e686eb4a8bf9af646d7901a6758ea9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "ef05b717c86e27b87f0f2d5353d42004d6f4af7155d91d95f56c2b2d71bb40f0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "c9203c13be278f44ef0d0f8d512aae1d3ddf56d444a9f1ed7380eeac5acb6444"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "1758327d0da315bcf2435662306cbe816c75766b76c4f94268a7dbba7bac16fd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "28bf22ae3899138cd70bedb86e97026372b01746a389f5b8e2f597b634c14b52"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "324bd4737e2581fe30e370a765cae214368e3b9828ad8e02cfd2c639b3909391"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "ed3c21cd7afb0803c2f1399d542cd3609fe7af1cb477779b6438b2b725e68da3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "537d96dd181b20293b733cc74bdbeed59e27747bcfd9c6a6bb72b3d547d00112"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "1731f6c6c526fd6d01a85889a59a9e705560233633abbe8f93d601f348418e8f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "39740890815b89b8dbf9e5b765f0cb64dff7d5a230323255d10e90842166e569"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "2b06f7195b72433fef87ecaf9e9c9fd293207d3ec1d5a7347646459b81d7ab5b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "2c8d721c6984df8c7467c583cd278d02e925a48c6b37fc2a78cec3ac4fabfd68"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "fffcd3ac51aed9e600a100de0834d21561b571d185678a1381cfb2a26767c82e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "60d4b0b56b9d3ce05a0209388279f331111b5b9f63b75f068cfc53d45f1097c2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "dd3fcd4c243ffeaa3d06cd07fa18760121e33539b45b2c292a015f2e1d8dc1c1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "950270846e90479174ecf3a360f9ccab9bc1fb239d7d7d267fae22ef8b061d49"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "dd4d77a5ff49adc2f9c380c121e6d6ebb1e83d38b43c962fcd7b1f170cb7c831"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "f50744f5425afa17bbbe3f6f4a19e92a5234140154d90a26a68f5a9b38ad843d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "818391f7e3fcc06f6af4261846fac8e50038b7d5f083f530d2e681d53d72d801"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "879ed4cf1b6949fc37c888580e0f1d072f3f4729f74de12f0f65e4d0966b4f3e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "7e957dd599acdf6e94289472c01583040fd8e9fc29f6bea6e7bd78a10b3be627"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "ad3a1f2dd093e66b1d7d7d4fd91de65347828ab89560866d44c4b888bf042311"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "01bb2bbe45bd6ddcb08c365dbc8bf531c4a8a901b49fc14cc9792f2564e5e3a0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "0ada96ea85f735439197bda7fe82170e127415ec1aec996a7c5400a8288b9dd6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "587df274655035dbedcf817fa328b086a66ed9767e42d67001a7675c307ae02d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "06ebbbaa6cf65c4c89ef48e15ede820fd65dfcfaad557f5cc82faba3961a99e2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "47cba44b53b7db7a18333a28a6892cd1bab617db85669129c6a64d4644e8119f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "68783c82d80c921140ff427dc67ee02f977f1c0f50085a52c10c8f45a8f803eb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "14b4f43018b649ec0adc5739291e96997d4e359a933b5b6080a9df5e531d5449"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "2aafca68831d7defe1565fc4da86be2b352408e16c2853b3f0e0f0bad0738de5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "814bd318861512913d4c00e87b535f585dd338b10f4342ab574783c540315c60"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "43ed05430877b4a62b0fb8363b221bde380a95ed6372c919dcc37c1001a04b5d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "76578ba47a05e1007f74fd98e17ad1050263c475fe15161d677e801a9e3ccba5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "0e0e4b10372a56bed7ca7308f7003ea54c787f62f8d05ebd12cd89d521b080fd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "1babb9db7d324bf214cf8b229983099c43134d22d962544eabd71b857721d780"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "7b90d613127991f2ac26825299ffad8e76c47530133113cac2e589b1c609205f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "dee5ec361c87051de74f9ac1168232485a76dcd89b563b966c1596a0d031dd9e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "24e25fa067d7b01977e7095a62acdebd2d955510c171997ce5353112c66a9f38"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "c6f1e400ed4077fbd927bee33c619085bd262058c35bab5772a4a37dd9d7caa6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "0d17ad4d4ea5df593f22f8474e261aa236e509f94f00590d0c218e07eeb62c6f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "721b3703cdd6b357036fe5dcfa4598953d4dbfe300a4faf72159accdd7ef4974"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "fea40255e3e31500ac6bec903c037d1439c45fbb8a4954d8b08feff522c8599a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "bc5e60757fec6e44a42bc21bfa8fc2e09844600e4645f460258fd052ecb7b6ea"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "80029195a8ca037bdaf4d3c378cd466667527cda8fcaafa69f7c06e7c0e56d19"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "beaef900826fa032fcd5f6d90a6135bfa28c418b2318e3e1479d3f6e5a27f80b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "d01515f5b0cf080586c825b5b9a4137efb8047563e0096ec4226436ee9649aee"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "67b6d8c7b84ff5c9f19f234a4f86fdce590b6e03386e7fac7d82eaf77d77d9d3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "14a04577da7e9fb9f4cda4ed5132933e2db51898b303d09bd5fe383b456dfb1b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "7498a75d088470ddbd62e11ba3416d5b150e4f60c8b7abb1c26de03820f8f570"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "f13915ad870d5f90df541f92b3b74dce8cdcd32cdfbb8b9408179a2db368325a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "d955d766694e1a9f0e9ca7f46494e800f18bebb87f38c00134270e9ccc5601e9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "e6659f1d407f81467c8533ebf29bbbcac09e191cff9331373333043956067893"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "d7c2e8de68fb4d625daf3ddc22e2be9e5f07b68fa19505017ee1fde43e4ffb9c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "d5b3ec82408787c164a297248dd21f2f02203649cdb708d20f7ca917f2f974d7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "42da681a0578fb8233615f82bf004b43d339605935d590ff55d0e7594a2d2c0d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "3694db4be8056fdb86771b001415cc1c8b0b58d9788979de4a159e5e9ed52c9b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "fde0ff15c5870e63facd1c8a5143f5127c02b3d48bb538b0b33145f12026640c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "d4cceee280bbcdae183dd1db58595cf96a9a17c6ee69b16cea39ee69be665a66"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "ddd77e2cfa3dd00e91d9364723cd897f49ebcbf5be7b89218aa39bafaa82831f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "a04c9ed27f2cfc38d2c741423c45a495f85de19f98c0144ac0ff7dc3d2e969f7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "54ee170f1125ed9fec173942835974d25c93624bd07a6b2f6434e2f6127abb82"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "30d8556ed0f0cd7584d7c1a75ee5bb244c37474942521158632abae507cfbb83"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "9cdf8bb3ca7e9b5349c2049ba5a24a5d6c8687073ee88a1c4528359edebd08f2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "c905a263419d2705d8645d5cee1edf31a516727ca85a7a9d26b91c8b619de2e1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "4e1239f6a3592ccad7e0dd288e16ca056f4d7e8a9354c1f78e872747ac36ce90"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "feac228273aabd49b22ea6ff1bc533271685e288887e9329f7e785611f6650f4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "6f73351a0d1b09141b24991444ee787f09a5e8488f99e68f52845568e2823e04"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "c16024909fdeb7bc76435923d18104ca7ede2b0254827c3cb66a4f4716d2e169"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "2235c7c13545580ce3ebb509125b2da67255fe216c1931406d19bcbd8e7b554c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "6e4407c49c76b48902c20acdbc802cfa4df03fbf27bdfe2dcc34141bf4f9cfa0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "0b70d7207369c0aaaa8ac30219f0e09c0f6a24d9a6ec9278ae7e474f47714351"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "71236a951f562941b01a40bc9a81a3b9f164cc4feb32cc072a676b6ea21bba5c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "c19bb3a43e39e925a625a0c293c2d66032dc728335a02fe28278033b010b2a7c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "355adb464fa47a43054ede1d6bb2b15cf85c5d89fc028cca1620e0bdefa0b365"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "378974ef6d308d3a3c63784089da439c8106a7d40fcc7e87422c6a113d65c4f2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "a02a6334b4ac5d126e6724148b1ea91d7eedf425d0167834c394f6d2c89fec90"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "931b0fc6b6f399b3728173108b0e60e556b8e041978bd45c550bbaeb3210e089"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "b15f6a82f55a81bc93890d3977d917e779c858e7d325dc5d6af5aa22a8455912"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "8bc2039776d5b0d97000b29c5720e6f86eac950a06ec188735d955efad77ac28"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "8bd651b242ba4bcfd6a26c36d0fe5facb165fdf228ca3f830c6741b042225df7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "2fabf98c3c5431087ef763b3db0ca5604e96af77b67dc7ac126cf311cd3d71ab"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "0001220f7a9c2caf39dded089c4a2a8fbb3b9f25633d43516058c6715006c0e4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "06d73da3fea4799be392aab6aa13c28a0d267d97de9a1033409f3151584837ed"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "721a4441e69fef86680d38904e3c6270f222f276159453f1b955a046b4657877"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "d1ddc4b768a5fc746e7bf6ebd5721be2bf965144aa1b9a3796bb8df9b8933f4a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "30b041376053a757532c7115063b2a7e532eaa3d26e45cc6d9d25e9a823274b8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "c42b04baaeca0de5d3140bbcfa84b54e1927fc64cb35af423647670dbdb86c74"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "1b5d5aaed0f0e23641180c977e7c9ae1ea254e7f29388a079dce2e1fa15963cd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "9cd2d8a583314a1002127e8a1ba9e70e58789fc38c609b4a6eb1739a499780b5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "9c29dc5a4f4f3abc32dd7ef8a9de273057a7422ffb108d36fa4a2a9f83b8aea3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "0fe6e07b4f7f3f91f8f54363c2ec7c5b91dd5d024b2b120704b8b3f8a805cc15"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "8ffca69a1f3bfa5ded5dfc57c434c617dcc99ef45add15d99977cdd32063fcc1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "c4fa79c5472d0d1b070b1c77c923cfc48a6f93d49d4e7b256421cb948dda6dbc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "4c83fec9da2ea54097944cf94aa1fc049efbdd01550e025094916a8ab5ffcf88"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "e8a411880174a40e7705077ba56d6950c5b6d322dc2d15384baa9d656184b551"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "ea28842bf459d978f4c1910f3d45d17445957fb5b49170299f70231d86acf493"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "abc7c8105aa5dc6270bd937a00d564f8d4264dce793eae7a105fd1ca701190ad"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "b5e08b05256ae47532d4c883be82b39c28b433fa21d2f4db05d0495e496bef48"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "53e586d2184b18b910b557267d59169b7937458e83362e0e11442837068bb447"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "4fe78c2d3ac5370d0999036ab1f17c89344bb9f2cb5d7ad57891e701c490dc2a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "7d507350f7d53a155fae774a18e2efcb7163b638c8fb5a23271b81800b8505c5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "f951a0bdbc72bb065bd540ae34c295640d2b65547db6e57a792b7a1e8ebcbbce"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "45bcc45dc614eb62f096e6e515e973a1ce8d6d7df32208da19992721687bc9a3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "a159a3bed2db074a0e84b37e277f6d2b5fcebf307d9622422ca2e9b23d066e10"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "728ddb4cac276a842db5325dec2cfd41c13d5c944d87fc6f8d990aea66ce9023"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "85acd0d81083a10418728568b7706b6ae0551fe65fecc63a7c2fb11497b586db"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "f37eaacc28c3e03bc506d4d9ba804b41be3b7da765799a2d9ec2ae55060d57f3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "24bfcc2d087e8345b48d70ca6d1aeb1d7b93d3ce4eaa99b6abe5f447c6f1e60b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "068e1e8bc9d0cf1d79cbb03e475c03dad321f719ab59193aec28fe1d84da7afc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "b0b86ad8b3a21ca154461ee05db7bafba403b347db1e296922e7d19a5cbb65b4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "dd7dd8f8c4bddf0a29e837dcb352353ed35c0cc778afcde3cf4ef931c94b90c2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "3502002cdba0dd2a24a9fbead8beec5a69ca46dc8cdc74da6ac5d43696ac0276"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "40d6b83ed6f71fb37b06660bf259d3856b1cc68ccddb8926ac69b0ef5fba0c8b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "fa694e6459461bc4378e00738b2f0ddeaa4353f0370eb8516b79f5d2169706fc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "b62e29030751d85b8557f27836fe68e48d6435f1f3588cbc8612876b78bc464c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "a36a2d53f36446f26356f3a4c560c11111cb791a2ab971f1841b7cb7df7b7a1b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "cdc48fb69a50b2ba3c17bce34be0c2e50b6d89974abd575643546329504463d0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "24bd3fb3b014305b690d299649797316f0fc7cb90f8811ae52d98ca5df001cb4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "5a88624cbedb0892282f89e7a7a1cd9f41e627a62963dc41b7e9f8a500586b9d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "ea22978486f6160bbc65736866d1b25691f628505861e2cd3d69d09d6a0822bf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "c5849a4e2acc9b95ed6786f89beeee91431bcc42682978a4e173bcadbbccd2d8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "869435f118a76a38d0c57ce2641455c00617e0017bf4601f3fa0b2a63e3a48dc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "47f10ec335c2f73e7e698795d6f380a6ee2c1d6bf0a3257ec8ef05b90e7d679f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "5cf060af6acd978accb1c4037209ee1fdd68cead463b35b79e28c827295e4259"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "8f07e28f5e12fbdf85e31a34e75cf822c42f33b7e04a6c326bdd598b69a59f67"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "ebabab2f80b44c7fa72481878e0d206f8aa66043a8ff51ea6cffaacc921e9517"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "0fdd75cd917b2360188dbc36adabaf91de599aad4375c9c8ee156cbffa1fddba"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "e3311cbf862b0415522445e9b63e72eebd117ab7ae93925ecfc92ba4e886ddba"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "671aebe7415fb62efef82c7a5f7fca613633e73afa06197d04fbea670679e160"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "6877db9e120785b998b281a227a8cca249e693c91c0c8465833c3bb3e9002d66"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "4ae07859fb95d8f9261f54f80291816639bb4221aa8f9d356ba0be7a7e3cca39"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "f3fc7fb671fd9e157360197a0bcc56db8a9ad0a86d85aae0ac0331d111c71940"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "86ed5f87eb6f4d3d93bc5f948f45b872e2e8866eea69987b7436159fc6b8c718"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "7cb422f2bdd315e9921ae9f21d61808e4a0e7a2917ce7473d9b3ab4eb2f0097d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "2f7d9a1b7e890fa04fc7c70b77ebd4edf87f222cf4c35781835bf59ea174f665"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "e19623d3ab7ee5e246416a0d3d5f4d494230df71eb8fb81507be051a21d21e24"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "c32cae5814ee8d0c6c0c3a68217b598015e36fddf511377e1f26066e729044b5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "5c20dc5a923cd70264d1bd890eb9c29bc525327b3894bddce6102796d06d7877"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "9f3bd55ccff038fac8803b02c10a236e0eb716337fa51b70961c7bcac2a90fad"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "59ebb0e008228cfa66a3782cae746c7065f3b61c294b51cec366dea73ff3aca1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "d8b356026bf4664c2b825485ef1b358d0576597d5d4ee854ed63645e896cd37a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "f9cd0837f9c51923796ec5d3fd25e53c09a1ee26cc92c4ecceb6ae8da4e9dcf4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "4ae28e9b8e758d0d17aaf3aab4313c181e60d02d0a3e6e41745c3609086173aa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "747a5a7cfe6d2eaf57280021548881e7d6d7830cf8057f48959073cced20978e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "ab2462711cb4a07b1a402eb3975daff6ef12f7a7717261dfd22da863e84bc756"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "680fd89072761fb5c3e41c07560bdb5508f493f9f619daab74ff81c4367cd0d9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "679e3add2663089cc7cb8a509b279be429b5c864d3ac2d6ce25184808d71e0ec"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "8ad1d9a733c3e2cda64b42536f07eca64e2747f4332a9f31b1cf09a04ca7566f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "72dc957c52bbf5ae138f2a95cbda75abb769f0df30a1e3f23e236acefa6fc793"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "18d9e1fcdaf5baec033da5d0879d7ec2ea8d0b73ef569742304af3d0b3579597"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "5bbe6c31e3ce137c28137b3c33fb1b2b684b032012ed9a5499b150936cb7a5b5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "f67de1dcfe54c4551099f41bfa476c0b5071f0fa74bbc06f0c125427588c8a27"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "e825273ba41f17b00ba38c488072c9072774165c59e4de4879ec1c4740bce7f7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "d2b48be0d85bd06e117b3155a453fa4cde3a56671075473a274d780c8d56d544"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "0be88bfe995e541d67cc7ed254e4e54468be05028c2742b10b19ab59616fd152"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "f600e812c7f60606d85292b9fbd13b69da2b6cdf91dc4f4242dbd9f1fab999e3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "05f217127ccfd8947435945d41fc6058c9ba50c024cce09701a7b20891494964"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "9ff720db1f0062b840757146616d97ccd94ca041e5933b01a50133918355cd9e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "9b05a996ad5e5dc58a30dfa6eb4c0834b93c9a5b666864e997ea30db1765477c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "7e1b79a031a8265da8aa7f4bb11c510ebd39784e3e57586944f7a9a794452615"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "d1ede8f47491bbf9bff8827b1b108d02ea0b9c4421b8a83be79f79e27d28433c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "694b0c5546cd1c1bbd1df86db0ee75d263235f223c82edfe5077b916d6198c24"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "8e13f6189edb7be103c466266c59ffa1f350b017383d79c3709ee92b9d2868e4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "763f46bd8f9a183e540d373d0b70e003d26962cf15c1fe5a10f399391f4011f6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "04a5087c66d2e6d4852d288673fd0542d7363cb593e3f282a3486344c8e5a6ee"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "f93f0e082af816b4bf5475d987b7d0f152521fc380ae6bc8d0230b94cff01b8d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "087614b309715a617b1321a14cee96a5a3aff02e09165f8f7e4d9cad72c5e8a4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "19a029ae19d5f68dd47cfcf03cdd7c9da65ea4cc1ae76881cc230595ecd4c481"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "d9ddbafed005a0255a5d4abb25e50fb002b54041904d2dd1e52b5bdb1d80e385"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "f89341ec684b59a77e168ac1bff5052243295d36ce9e2acc286799644f8c6277"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "a80438f4d9f4c4468ca49956d595ee91565e485eeb0df6efe17cd7579acbe5b3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "a7c44ea7bf23ea7efe079461c200be7e6390ae1c6faeb1317ed5c1b6b9999cf0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "c2f61aa1e5668813461ef294d530cc83d267b85d147e05abe803a53df41dcbf1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "4eb5cc1f454c65ad73ec441329caab937863991f3f866c8538c9a5c1f17cd69f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "6de8a63bf26898d25b560758854401f447973122bcfe9e18934ba6d44f866b17"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "abf448bbd4e99e388053326ee9b04af50494c4b79a20fce544ae804722914258"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "c6bcacc6e0a4e5cc5db0c134b1b5a29a2d248baf295257c449a0f65618c3f188"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "4a9514d1505d34322ba080363c6b2b251d4b04b975ad191c8c517b7c32ee4a56"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "db23190bb13750acbd9e5b69fe8f4410fd83aaff3def6620a80243735da00a79"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "e5a1bcdc5ce976c4cbb43b010a76feee87a09eb1c89e534bcfab25a03ec0609e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "38dcd48cd7de8cb341f45bba05d90c9946fef3b889f80386d911050dd013c426"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "faad225f376b0b8eed1df6fb3e4e82b76b10e39ab44b47d90d3d3cd8f5c9ee19"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "1f48b7886e0add518a17a8691505c2ddf0d0d366b9aad8d5342967d34d9f6e8a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "62d7fe6b67344598dfbf43fed9f920afa675c3acf37a5021cd73a7a6c90f43c9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "cde95b35a4f996c8cdbf713b2f4856a376e7f6ad7abafe12e7aa557e4fa1e9c4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "fd0bb31ba5c44a2b95d2d2d1eef2cfe526240cc668babc49930e3c7dcd5fceeb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "96886d7de0a423a69279b63ff11b6773b76c3cb6367417990d9be32bbf20b95e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "44a6fadbdf8139026ccaab93a0685421f199512177037042dbc8894cc00095b1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "08989bf6ecb3a605648640210835fcf12038dc3ef77434dd75f34fe4645d3583"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "99bff4d7bb21e460ec4159000f90e7505162c6a98874b185dda0bbc79ef51c2c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "638e950b04b4bd812cd7c34fc88d80f2ac0037ed71fbfc12d097114198bbbc4e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "a264c3209a61789f162000e2f18fb20855c812efbba16b7687b675ff11d20891"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "57ff6900cca93ad46c1dcc3cc41df68c3d8909295d90dcc0a63b86060ff63839"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "7e2c22c91294aa5d04dab4565acaf86356a59a91de57568e945cbc86e2cd6ac8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "7263a1b5ea9b82b9856621b3af85a51f61547f5d697b42e407df108cb44f4cbe"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "b4613cab45673a827e6d50ddef3f4104cd3f768def0c8c9a8c126e7295700e29"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "efd88311bd6a64443f81202183c402cd19e9b2e7fc85fc8809f1e7c63147d2d0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "2111ad084811afcb72dbb7950315d11c1e1cda7ca83a9a88e43d7f792cbbb832"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "c3ff4923e7e65dbd5fa6868c50cb798f58797284c7024985aabbb4a5f21c8a2d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "4ff1c814a7d7934b0c9afda9ffe5e29ff62fd5c35442ad5c6e203f75ad979e28"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "fc66f1149ada66be36d34c3e381c3890aad4d0f46db82dcb70b90c16cea15ae8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "e0ec4c9541b6a613f00642b8a420bf663e4efd4c12f6395233a799b4218313cb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "7fcccd25a354e01eeec789cf7ce268277b1bd8490dae68bf09ce15e23ceaa197"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "cbeba2f2e5d25642ad1df4bbb62a747ce34063d36c6d7bd05d2f29d971c30e29"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "b5f0886b7b9890597749dd5dcb58c546203ac87b69dbfdcf5b4a7e522524056d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "88146459ce30fa9534b733be9ede11290c2324e3adfa7acc7625626b4376e421"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "119e05e8518b14f0e878de36a9461265d07c83886fb82c6f27225b60c0439a12"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "23fe9f1af25979888c9b5c98dbac7008ea0fbf40dd4055fb951b776b3ce73bd9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "24b5e33bf9fda970fb7be102c5200d1b793704ca1ba1738be912a981ab4e79ce"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "815e2c34f4f546483cd7e39d2d92fd1df22474c650a57da016cdc8a64165114f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "87e9e40a4418abd1678592355ca9748b7babaa9cca5eea52e9dc3461846eaabf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "5f0d390192c713791c5043eeabf66747556c7a2e687e3e93a0cbd36b7140721f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "14eb721aca0081aecfe6baa37363111f905833d7ec695ac51a704cb39a68de05"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "1b729095cc4fba461a4cd32edb70a591dfdf0db4aba46385fe2765afd1502d5c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "253094d13c51e37c22430d5932a37ae2dba7564e666e44cd7af7c4201b861e27"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "6ce0cb7adb18c81e2c177ffa2c856b0da11676fdc480cea2f3f49b647e21af2d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "2262c5bef6fb6e5155e78afbebdc171d36c22257c37b5337eda0e298f1510839"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "1fc28edef42aa81a8c214396e642a2e48cb6fc01263559e56509f4a67a473388"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "ac178a35e3df4757e440a996cdc99f79ddc006eddf7b85623581915f8372f129"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "45dd75aeff95ba20233b41f1da56c5a45b978b0359f8bba4cef02ec6f008322b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "fcba3dd93f98d7bf7b0b197541bd52de63c5656a00922953abb7f356e298657e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "332e04d4ab569b2c92d6dfd240d04dfde5a0df4064c70a16f7dc106f31d597d1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "cedb0b92fbd13012345555e571f780306d1574dbb3e4938b7c93aaf7b9c74722"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "2b5efe316d68d950da7321057ff0883498eb7af0ce9cfdb5513f91b047b05371"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "4e2523d7ccab806ab7dc719402e1996e75729ea0fbb854df09e9c5e84e153ade"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "ec536121de14d00ccd4ffb1422015203096ac6f3c982de2581a12722368f30f3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "0f996cfd0f282d4b625b6176c5e5b66fd9a52e47f2d1bc3b63a3c4cfe3937d77"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "c569317d4acff6a43e2928b289bc11186df08f4ed80269fddf0cd312219453b0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "2b390f6c4bb6fae09ab739b9761d4f8bbf42dd4bf23fc163fb78148192a17bf9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "da276f4f07baf58f5a98ae4a378bb8f1adc89237ffa39b4983b9093a9f56fce7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "e966d96739afdffdbcc0d4a8a01b08b37e0616ebf944821e20537c8e39ff5c6d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "2a68c651fe436c86124c024e257780fd51233854470efbb87e46c2947437e391"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "77af4889a2dd04ce55df5accc92c07179cb0cc404dd17bf14f5ce1d405f2506f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "bc2580da0f7e1b71cb891310ecc02b8fcd2f79d10b629ac4aeb8d68b45219f80"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "1277fd40484294287e1735c01ae95ef14380fb35e65785548e29a7667b5a7ab3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "0aac4c37cbab4921d202567fdc5c23131c6a6b180a94b4326d77ee1090930ef5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "2b07821906178734e80d8eaf33e5e80a1ccca99d2b0325c3505957fb36f170b3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "4e5693f510b5f0e6053f708882690c3221934134cb2512b3ac9e684515a06fb1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "45f793137a493e11cc6f9f888e103639afd6bc54a90df08fc13079c890341603"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "6ddc79e3972acd0101973d041163cf0473ebb3fb74186bf8e28d3d0f4ce699f6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "2cdc2cbdb16f2ab67be89cb82e89f2b7c931b3be2670a1a9f8da63db5cf5f33d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "38ad92d4a21337d5ba0712c36cad74da95883c8563d6a9acd6c985559a895b68"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "5104070ae8edf53d370ce5f011e5c64ff59f52953fef18bb7df925783c373b83"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "baaad7b1ba54c2bacff44f7bdd8e79cef5a48d4ecf60337a4cebacc356f1ab81"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "2ef1fd7a8be3633d44d2710484703dde6c6e61dc5707a1b453b08015160d246e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "79c312a9dc3c9811584c6c2cf2460a15c9d224e09e03e3e5910bde018d87e458"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "4c8b0b22e3afeb622834cb8863451d000c3ba9c447dddb5b5a2060dff6217d60"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "ce98765360ca1b643ff52b118de41f1695d13ce34b7d4e81aef56c57be12d146"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "05bc129eb9bdc493480dc67f53c817172b3b0db5ea0f1e6196af11e0bde0825f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "b4bc92ed6177ef2da0c161b83c2c826a3d278a197b3f6d43139ce1af95376e64"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "faab5b106a3342015b92f9d188679e89f35c23ba8d6c7d8a0fba133c9131a493"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "c0cdfdb6e1a19bd19e6a2e5e865c8e9585f7148151cfe41eb40a0a63b4c77bad"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "8474e3440de7d24ec6a75f333495f2af7bccab92cc46d7b4471d57e7f8cab652"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "bc3ef6caf7a830d40bb4556650bacac585b6319c2869fe060b0bc58fde7c6acb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "02a52c5e20ef90531b5990651c131e4ba2816637c09d084d6a3588d518c3eb26"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "4b2e5679ad0501513c6a9e88ad02078fc6c90fc342b8d316c0106263ca884f2a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "018861fb27a8f30970764c8248ac3c5c2688c370ff0888074ed9b7cbb04f052c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "44c83170bff4631cc2d0bad12601355fe15a564f68402694bd4dee60f9f5ae61"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "7050531d372dcf7a1ba8f125bfd845865162f845ee56467dca913e4abb2a4e2f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "7748be25104793ffc7a2121d243dcc0d5631550cff470e3be6345f954cb73a10"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "c3ffca9bd47ea21c357a895915755f83ce9ca0f89474c8082fa79a0f87478f37"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "0893f1d632f6187272f05ec16b1274f2d9e99dd0098dd9defcd089b8780bc3a5"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "81d4cb62b6d587529d57c1e7e513235fff86cec14f0aff9a90b75711b5df0a37"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "d7c69763e0df81daf083a07375c658598ccea19032c4441d4784b430aabf24df"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "be307c54b5b898b935c2fd757d36d28305b4be86ccecebea12d739d9cfe223a0"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "de54608b7d51df9554d66a0c553755aa0ab390e3289cc41b4780d09a44349787"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "db52b1143398ad144f259d8baa20a023ff398e5eb714160fc7493cd3ad3e14e8"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "ab371415eb8346fc5047f128dbc7519fc17bc39dbb5a3aba253d5662fe00428e"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "4c385182bfa38f0976e28c6aa41fab6797aaceae11a86992382a23b03820506f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "ae556b548056d8cda0644dae9be7e231e42c2d5b0c58b5312205e3398fe5bcb7"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "38c752e8ea8874b36de1e2aa92299060c678ac16741ea1dd1b1b8d91c10244b6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "f1782954e684897534ec4e2485b94583311018ee37b446c025ba1d2f2f3ebc6e"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "282dbae110e625a558978a95a1c3d7a9e969e0c785c2c70aaf27801449c133b4"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "fcd1b151dc2dd93aefd4854d1a32256e56c082b59a03d845a22e4feda59c6876"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "2de2dee35642c7f7d412221fcc4af20aaba75f51efce0bc36097070a925c3eaf"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "d0b01cc8154c70b3aa1047075fb32b0628715b7eb0410fb440075f090272f94b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "7b68121705a53174b20c082583ebf7df4cef1f8ea5cf3f1ad9a18204c0114d32"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "d824ea3917b719880786630a21b1a00f8ff677e01515bc129bceb122f33d5302"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "33fb90e6a331316da72f824111095f56f4d8ac14eae9a2d1d36488f688ff7ad3"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "363bf6a10821837125dc2ad900993f2478176299f205338797bd50f52d435cf8"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "db4fd64cd581a296b641f89e6dfd50af87b26b65af764fe5a67b0c0a9fce91ed"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "2452698b540fcbb5b9507dce67d3e026b3a90a2ae5add12d3720cc3216f4e304"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "8574172363d7f0e1eb0d869cbb87d1c304b26d1f6f9955b494c3a665792f3aa2"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "67db2d662e05c4d94c9b0f178d11b5cdab135e5b1b3023bef48a749e7e9af44f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "fafa02a8708d4b1e5e9f351eac707d5af071738bd823b15664e9698f03e23ffa"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "4ae0f33a68272a9da30123e873e7af45cb722502bd2de8e4f87adba8bea4e072"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "59862e369ce2776a82c1f5314ee04b11a94e196d5ed9ce5d28df03eb1fa1d774"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "c2243608a8fe6777098f7c1fa19521ea2d73eaba72049dd04c18483a52cf8d72"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "a1d7dfce7893be0119dd9bd64b8be2608d5dcfba0131bae2b404dd9b942e76f5"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "7eb8cf778787a6537fbc6d4c0473127f77bce4272a782e8ce6f7aaa8da504ff2"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "74759b7da275065ff4564b39b02f2679183679387b5827d673773d6f6be777dd"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "aa1a3462d71f7af6997a9afc6495ff0eac4b521d3b9c60af1d8182cecabbd720"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "c04f7fddf1e0c7cf6108ef64a87ccecf9ef102e8d9cc13a406c6261d02255a4e"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "7dccc6d0834b88317fa784e3c57d872051f090ce17bb295b3412d12ed6a2b96d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "65794422947c9bfb703f21beb01048d1c73e8b1bc2385a5ec494007449838feb"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "bc0fa3dbdfe1ce32e699c529da0dd0a9cdbae28ba4657b43b6450fed09674d1b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "96a11eb1b949bea4a10398965e6790c93bb953ccf12499cf2ce9a31d9b441008"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "1433352460eb3d08b7321699332808c62ee927e9d932bca545724a7b78d91174"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "9554066279f246dc7f8cdbbf224087451e1654aaecf044b0dce90ff22491761f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "727af543f0400370efb02c7c53622d829c73f752d2641955e245685149bc2285"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "72a32c837ec3364e21a0ec3a233997cb4f0485e3c26c6041c9266860a53768cd"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "39891c61448cbd0d891ddeee2caf2b8384e389f2724960b0834d5e5357642459"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "a60016a3f85069dceb1c99ee15db46b583e1ddafcbf65b208c886090f2ef1ee7"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "5e7681d5554f4d47e468d64ae35f6fa16f14842680845cbf6458a9ce645dd8f3"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "18aa00b6486b17d1ecde112518eadcdfac5aad801850d6c60bb1797bb97e50f9"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "25906f19d637d81d2f2a832ec4a768f5d3d33a5d0138d88b7fc40ee4609137d2"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "9551ac920dd8fbf33e89518c454129dd1666cf47eac1c7b22bbd4c2bc1fe9dd3"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "5bb0b803cbe53b3e418314c6a27b253deb1883dce98d70360eebb5886aa75911"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "b98b182227e4592d0a6746341362191bab2ced05533459545082c93097bd586f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "8fca16b4e5a72acc6f16e6d3b0b834f57eafd8020286c2cd07c2c6a86e129614"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "cb589982925e22ebced95b6c6a8f3532efe454952a3ff0aee01326e1643f32fb"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "76c3c78eb990bfd29ca9e9a885bef57062984d89bcf347b17a99f539b3931e4f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "5649438cd6be6e43c0c844e5b260c6ff0a20adf85ce207746220fc6160a8c068"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "004bdc91acc0eece06405c7a1ae71651e17d7f4e778d48f8f36d5e2fd9a84891"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "0e9d96048905f8d17cdbd01de0ddbcaeef4b4a35bacc3dcc160e82adc586c7d4"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "a7bc73b7b664877ea7240775b29accca964877ca8f36a2927142b7c0f29149a0"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "c83f6db40fd2944675b70b9620bdbd0f3b72e91088d53a6c694447d716da3187"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "35bebb23c5f94b39e14ce3e718c7447863f37e09145f9a1934f78bde018643b7"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "d16b97a8597df9dae84b71364f6f32abee93e853aef1286b753e6b6103602995"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "a7812c1f378d9b0bb44f5c86ac9a9d1180023344405fd6d31880057ea7e16656"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "55b1065f577819d5800f33cfe344da69a58ca5a9cbc83e3c8c74943b7664ddd6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "54d7daf334930ccc060311f9dff8a8004b2dcc40ccead5184cc078f447d0c621"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "899afea7e11069415626b557103d783e41ba77f26860a487886d01f9f9311793"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "3e7c779fcef39eedb27e6e9a11812420bb4b8070c8d6cb7c0e9c5f8b6c4cd4c2"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "5d2b1be0d80554a52119c32b5f8a53a9b00ed00a5881f410140072a2d6abc884"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "b5f6c1fbf3aeed381be518c31e00ddce0af041f9636dd0159a43035c3a3065c6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "2fdf84f14c998068059d461f156eefa57fbcd615143f98ca3ddaf21b762eb486"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "0d38ba84bbadb2df778d43238c66b17607692092d8da159e7b151f4a15f77a66"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "d5440b17dd6f60a2309efbc06af9daf1e768953d800d471bec35e0ad226a4a04"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "b4306cc8166397552106afe437e276f4d67df5014d5ef9ad8dbaff6eb5b121fd"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "4e3739e8305717684c265e596e74718044aa1bfe2db306303925939cc0166477"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "e68623380b31326e8cfb5876d6cc0397ba2e152fb900afceb5f9035173208991"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "908d142b3b47d34e427883e646c76854e53cf846834cc0ad41a687129e40d334"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "9ac4bc6e8bda99f4e42cb70f0cce9acdfda356f719b3c3c1a63aa7b48bdf1b4d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "4b5b9dad5128bcce35bccf7b5f6479e474438ce08d53e02170f9a1e457953cba"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "e102b6c63f78af81a44b35c5bc533bb6deed40cc0426b36bd0db66fc23b76788"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "5574241a92bc11d4b183a186aa1fa75146d3981a13b2749a133ab39aae7fdc93"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "d7b9563e6cfa3edd1f469008293f71a70e34c1772da1cac5e35e591bb2f6c284"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "23b39503514eedf26885ebf03445759d544d899af0d54ba0930390ee96537284"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "f5e10b1f9b275c1a0f0e5c8dda911bfa4358549e5eb3f691d10ba95afb91d8a2"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "6ab5ba0f6ebffaa50825c626d05c567009ba79ad92771ef973fb90422d4e8573"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "fa2782bbe2657e3cdfa8ce21121161c78817d5afab16b09a32f5470dc6a08908"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "4709e7cceb4b5d5715587348c613aa9929cbee122b71b694efba03d0c3ba23a0"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "a45814b3b0418bbe897eaf76fb705b43b12b1be35954d6727dbdc6541321f8ee"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "3e2a1cd01cf89168fbc4e9d2504ccb4d11135f4670a8ec2b8b1723c8c59cf9a7"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "14a801a5567fd5d8f3f77a5857b1bd4ce9cbf35a445e3ba5d5da4ef87485cd49"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "85d9a08a789eb525683398e06cec02c33fb53172c00a304c4cd50a9f459bf961"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "3c2869d0fdf5d146af63f4de9a28f89ff775ebaf5c2ec1d0a1f683755020780e"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "7b3f0214b76809cf06703a09cfa13b90e55e6725da505c35b2985b9521b77f7b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228328, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "5927d8f9fe9a52f3ee41bddb85b497e47dbf8a88ec268f3c67b64b8d2f6b175e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 228344, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, true, "746e4ff69548dab107a8bfd3a356ac65e24d14e4830e4df51744291be519fcfa"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "eab2b96c684e18902a837361aa501dbfe15d0a5c88cca519a8b0cd8328ccafb9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 164976, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, true, "c7766c186ef39c68379507ca99a5128dceaef121ced23f3bc209f4e7fbdcea6a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 197824, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "b292c42bba79779c2ee56d79a4ab0489b8ecebd09dfd6a32969ae106b9f0754a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 164944, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "354c527f456e86cdeff60f246cf63a22926395f1b195fa036b1674630e51132b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 197840, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, true, "cb9a5bd721e597420ad4c0b594bce21b6d96223d5800b32389629cd68f3dda37"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, true, "8eb46b2f68ae2f21642d7300817ca254e342ad7bec7d3b04e43bf61594005839"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228328, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "29504874728d38ae9be9287918c00654de4f17d645b32b258623d9175ef6ff56"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 228344, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, true, "7b24584ecba77cf8da5eeb3b3ba3e38664fb8aa02e2b41728934f243c8b3e4cb"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "b63456b1537a6abcb9a7053a64b9cc91d656b3fcd613e33abd09cba26b617f77"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 164976, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, true, "0d8c1ac75d611592fa4a865f7730b11bd4e068fe38cae67f6df67b698402f746"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 197824, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "4362eb84dd9071dec4f87d05fb8eea47eba3cebc60cec85b7fe3ec7d5b526c30"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 164944, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "a8b8c2582684301741f8f7663b92d7cc0dc3b914c3e43211b4659a9a0ce2877e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 197840, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, true, "c3772899392f7a9ff6d708fcf7198f343b4c4d41cbfcb920976a45120270f6e0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, true, "dd505285b7672b8380bbab973bbeefd2750e2ad1257c98e1f8d551960d625d72"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 183400, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "c731f0b8588a05ad5fb49ecb7677321fa2ea69b8f4e79ecd264061564d31b67d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174696, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "f33a9f46e29aca3b8d13c7a63f7b6b481536d36c007728ae5481ee94d35de971"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 180408, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "0fc9becc707e210a5f5f74491f58b1a21d3ade3d186340068d4c788a8f715617"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 173624, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "c1150fb66a4961e0e11f2321d5c3588f5ea18a5b9512a4b129ecee3e61299b11"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "b65e0412b56bd7ae7611841dca9514edcbcea00944fd16acdd5e8c49b642d18b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "d4b94782d3c5f18f421af655aeed168ed990d54b735c6ee66c4386dcae148306"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "e3ed636509463a19398753a74bf7013135bb22e0a6edefb8c1254362d56e48be"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "3c7a86c312d36c0ae85ceec6dad48343c76001fb44f6c8c1478938cb88cf3012"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "b24c153643ba57a6204ca848899d35a37881ead035a1af630c33a83c92092538"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "2b024267e48d56b158dfc4894dc0f4785c1dff0444f2f820b065bab6f4b8e74f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "549a6d43caed7e0f08ad229254a29492eb38b4e01c3ca8774db89790116726e2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "4ae92d528735a5577cdc95427c2724684eebb2b339977f7905e719878a3cc0b8"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 150800, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "47e7797714396c29fd17b8a25a22719f0c432ed00b56e389f05a94b83b427510"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "3e7e202f62b29101d07789c34b35ab0c3389758456acf295999f36d1992ae887"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 141968, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "dc7540d023fda5429fefc44434f2d96eecc4dbc38176137a3aa73bbf3171cc6b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "fb8efa51eaa0dd0eace8db646f740a63193da5494a2bd7c692e1b3d165740d46"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 183400, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "1f42c3677aee88a48dc571166cd61f75e4f6a70240f116fe0c3c718b5226e051"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174696, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "501dc6a46da5298987dfd755d80bec25138cb8fb4f64507cda7255e56726f4d6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 180408, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "9bd34c5feec4fa053b3c6cad6e37b622b812dfa45bb24d72e8c95d08c166f477"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 173624, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "35694c8b10c21200f0423b860f35675fcf98deedf549aedd6c8ee3a5d187e5be"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "eb80b28efd69bdef61b949e53a3c117704079cbdea23dacfbf47ea84521b73e5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "4bf613c3b859ac6ac67571401612444a8313b5962ed32d8d1dc819e394f3dec6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "4a55155d1307d11873c664dc7fa916d24fef93900805065b30814b6720331cd9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "04c5cad2843253d315478f859db849556152591d90a5308c72c36f08da6d3645"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "3fb3590c86c6137ee37b8c280cb05be1cf6776d9f4147972832a4a91bcb5f6ec"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "bebb4b182bbd3ee3c135ec35981e3cddcf8cea462388b834db438e5647586ebd"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "9088344d4e667726d1fa435c40a8641e281b8b6e63fc98b644252348ad9c9644"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "3b3528a0297d3ea22cd3d79b016b06635586d0873428a4cfabcb0a0b672229c0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 150800, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "ab1d2956f792715f382b7733bec7b0bed9b528ce1f186c0f275b4b6a56ed3cb0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "8fbeda1fb2f4e83f2d36cd0f16c16aff9a0fd62788dcc68b34ede20bd88de959"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 141968, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "64ec8530aa26723b85d8aa8b52eb080526a99f28810ae728d38f1dc29807e1f3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "90df058f56758cc6889c3e359395624a60ac43aebe833e6b7432f732d3334027"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 183400, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "398eb30e85479e8aabcd37a566762c2950f75c791798bb07a596c51c789bb6d6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174696, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "91675072c5b3dfebc08ebcd923770a7a40741304a3df937a5a2820cbc9efef43"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 180408, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "3a863a61e6787aaf5b993fa7bcc3d73d40425864e7a91803323805b85e58a7e5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 173624, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "4ab6a3f73ed65ed3e3cecd06137f08190fe8fad92dd6ba3a8fedf065fcdc762c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "b74c0f112f5daa1a6ea1da28da69164198dfcb24d8ce81dddc981a94dafc736c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "c3f11951aa572353b5016d0d014572e91672a238347397f92d4c28231b8fc486"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "33ac34b670a18efe3095c6e2dfcfbb3f9e8dc7a5834ad3842362e09a67a45172"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "617ff18227c8e81aba886ec48ec6c6da17ce76d681db822d94e7852653da85a4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "39db8281b17b4635d41ce4496b3b26b421fd2061dadd9c479cee646caf51d924"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "e4c15f35cb3afec8244da33bdeb981083023a14a2b7a2508f712294f0f626529"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "f630a0b63463137ac8f2c779c383f0f1ba380dd8ff11f52f2be940bb3f8f69a1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "174b931e5702da8b2723358b8030a13c3dff704c07da16127f656f0b665ad13e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 150800, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "6a1b77e9d6119e61a1507b2a3add8c835e894ad3c6c1d855e75650a348646e69"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "197ae682cfac10f4d9d876cf295806ee3fa975b264e2259f177efd6e5deeff4a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 141968, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "4d5834b39c830ada879e04e3bc300d5a69f36d5c27b395451992733e342d7697"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "b946b119bc12fc21df1134f50fa602ec2266565decb9a9111081a63c6118fb51"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 183400, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "721cee05f85cccb85ca5b432dd203ae3824756b35cce514915b14d0a0404ad11"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174696, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "0cfb777e8443ed6927c0fecbeb3ffcdd44db82451ab372be325e4fc5568ad28b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 180408, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "c11466f7d8f664207f346e21f78793de18a81b0ebebb7611e00e7efbd118f0f2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 173624, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "fb7424ea277b93ccd7314ae8b3f7328ded5ce99f345206a011b55ef4fd2b980a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "46e624a3f8184a728ed76fd7483249b2279ebcb9433f937b95c106c17ce35ff5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "dce5947d75f13066196752fb2220708ee40d1273b7ced2c0cc02e937491a08a7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "88a565d1970cbe2509d3f7a194b3e472411fd5502328227f26973d670dcb0c29"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "f2e42d23dbc472365366f0c1be0ec152c857ad4d50576b8a29b1c1443c87f593"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "08cc68b34c0228e7b35b10999d5c0c3b7e0a907189fa9a4c94408a3417d3a9c9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "d2f761760f1dacab9256ccb928b4b532028ad211847185f8c42fd8b4e837c6ab"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "0b3a91c803b00a8c754675a308edc9763e35d93cf00fd6de873752df56ed1005"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "d49fb138f762245409c4b35e561e7b26c7b4552d98200a2f2b6fe8438065d5f4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 150800, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "e3871920276304466c10b48385a69429e68e60fd184db08da35bcdf9fcd5f246"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "f10cfa2b22be6118c1019b763420664525b55088b127e334283c7d7751de82dd"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 141968, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "98dad8c18b70184663ca2a1f596e7b5af2eeb35302f12f9a2aa59a27766bc687"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "8b9d66109704245d3b19f880159f8fe71e5dace10b7ef3552445aa72d6688c6d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191080, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "13329f47223ebfedb235dee7a829ebe488530d8d0c95a2b7aa6b1ed1b9571279"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 178280, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "dc61fc8f7b847faa34ef1a7960627551eddc97970869c4d4c6481a282f30c031"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "f5e9915caccf6cbd627dab7b1e76f8e278b4c294f1b74700c06576230b028a23"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "b2a6edfb542f8b3f8ab200e63fdbfcf1f8ccdda0c439bd7add5ba8ec8c03eb3c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "c7b6634ad45eaf2756d3463269b62bd496f20437d8e888e77d90526abe42780a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "f4366f211979045a2e51ffbbf6f9156ddc733338079b42bb9d0eb71ed9f8cc5c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "a934aa2d09004e36930ccc5e30f1628ea12f7bacaa2da5c2a1db25352d641907"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "637d9e8d5b75feb1c152d3912f11900051f6eb2fde861d939d906772e5ae91a1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191080, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "2bbed9b1f49a65d6d64589907187fa5f6a8e0cb3ef4a2552143a60881665e563"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 178280, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "388b7ac3a336920c13b91a63e7cd6191360f99e4dc85e1fff2f0192ea8470d0f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "fd4b1416140352ec34b59b9c1f4cd9d13f501b54a8556b17a2f4005aa1a1b4be"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "581c1c3853a67c9ec24c3f52a25b2106a4bb859af4cac6ecbc0161a5181773fe"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "72dca2d4af100dd54d8e60216f0dcc5698732dfe3a71e2e017d503637f416a32"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "e6bf801ec9081121b120c8ba6b6669a6405f7baebe1b46cbb74539fc77432eea"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "5f31a2328e95d0c409eceed88aae139a35ac2ae87d1fd18e0df1c6a85d023faa"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "af0bdd263bff35d2e744f6c90beccfa8e21cceef1729e23ffce758340c52f95c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191080, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "77a67aaf38fc078007bfa96fb21a75f32b827c50401badff92e3aa742a3ddc66"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 178280, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "51f8c8165205167fcdb17eedfb4c1526ce4fc4686a00358ea1d21892a294d473"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "64d583cf39b9563566bd00798c797b8276f373e454af60a8e4f0f3bef6dec775"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "c2e03ed7b1986a0b7a5c71d76b42307a8486759ee392251a0866651ec12e4ad6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "5d2a588e60e1a7bdb52d7357eb629118da7dde57e97a9cd362e8492ba384dc8d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "a9d7d59084b6ef2fee4c47747818c98787f09fc15e72127b3683a0f0891410ed"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "b6a56c68b5b03a2bbc98194f8e7fb00fbc57cb93ffaa8eac8f14893c9c6bf577"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "22c8436881df59a7c9448420f90ef463b3a7baa1fc59ad05e4eb032f3a9307a6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191080, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "5d1b3f82d5422f136cd626bf5091e4725a931efeb38b52a501fdbde0e11e9457"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 178280, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "b522022975f74568c5ce437c583a4a2084c4f39946d10ce91752176524ac2510"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "bf9e852a5d634607ebd7aaff866531403ca0873de2ccb13c926396ca1ed79a98"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "6b0fdd17a4b45d8f881466c911bb33b10b82e4d01d6d6b7cc2c09bab1b475c98"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "e0ab5bbf56a527596bd348d82e90daf487c5a3eb6c654fed92b49a1a60114658"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "e11f0636bc9165268db14bf648a6aaf5a1ed6cf9688edd336174253291750958"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "e08df1551a99c1ed250116384bf7f895bf9aa7da14d4b23bb85b88059d7aff97"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "26c3df392f55ee50064b371a40a7d00f9eb2af567d32f9df66656d746727e092"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 199864, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "c162d91c6338b07e9c42e2600c6b5d7a482fa79859ebeab013e4034269510ef0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 199880, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, true, "11526e111d38eaeb33a0722c5261b3b41ca20f9700fe2f263f6774d9d5df5b42"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "1684a5607c7ad1ed38b59d6dcb6250bd0e58db7a2737dc1be93d5a90d39119b2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165056, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, true, "8cec9aacb1806db54159ebdb0cd66c4381bc54cb3d1cbb973192b3bb89808a3b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "1091bc5e00db1eee99a8d57fed96314557e43a449bc0af5976be16bb77f4c437"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "232680cff148d568b72aae1166abaf68f15106062ab496f2a1a5181677d7376e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 181536, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, true, "7b52c779e5ec08ff5d81f158a92477c38224e0bd7406ceba22714fc081066f9a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, true, "8b99c21dc3df3abba83184cd7e24ccf83a2be28829c744b007ab91cf2978182e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 199864, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "c3e9129b98558a9f9d3df709d8e687130bab007e2e4e7eb502de93264dbf209e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 199880, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, true, "061aa411fefc44647a2a942c3ea4b9a4a09e861edfe76b16bdbca498a6714a8f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "7fadd92c9507eafbff5e4b4e31406c726e79c23f0134bd279d374ac516175c4b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165056, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, true, "e839a5c9d90c6e65e16cf5221f03333a70f9ec1c4b8ac13d5d00314a530fa8dd"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "04701497aa5bacd5800104f00839b54ec1f71841e97435d8077eed542f308a7e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "58c8517a2dbd34bd1b6c6036f3d036c44346edd724178b5777372afa84acd28e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 181536, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, true, "2d77a493930771126cf52ced908f1edd6c309eaaaba6f614f55f483448f1aef5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, true, "8f776fcc4d003d731e0ca288504d7c1a860e54d2e9467a8d7ef0e61ce233b713"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196792, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "62d7779d45c83ccee9fa746f934830f50582454006e0b489acb35d171b2e1e67"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 190136, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "21cff09ccbbdee4a7e59bebe194e6ca223460014abfb71148cfd7c3a6795455a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 195848, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "46cebd66a08b12c16ba27cacc4d51c62460b631832921228fac4b666cefc1274"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 190088, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "f48dbcbef8bb3dc3a27a112adadc90cec80e67ac3ef1aad0d59bffe1f379ce89"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "3be74774857f52d5198a093f2ca42f990151f4f1098c1565f3af6420ee221586"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "3abc6d3009a5167a0967d976b61518c626a0b09f672135aca8ce36944b62e8e9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "072221ac49b11ebc1c8e237cddcdb453d0ba0dc317f7211eccbc0408e51d1b93"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "3a151aca0a0d9e585c7f6db30db97c58c9e0869c463288277e681c94ce6f9a45"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "baccd29cbe67918f1ad342c0e71c6c67b01f08fcae50b82a0b735ea86b0707b7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "4b381a189ebfdffc6d881c39ae236ee570f0e065b7010e26788e203b3eb59b60"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "378b5d28dbf5acfce79447fc037fc9a02bc78f5d42cad9809a63d3adb1568ff3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "37217dea95e7721501297725ceaa676e9ee11741e61873505e6ae8beefa95606"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163168, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "29f0d4bf9cff6dfa7c72a4f9001dfd023bfa7bfa2669b1a0ee5f5e2e9f475b8f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "bf29da0d1f62ed033c89ccec7dd7904a9b10f79c51a886483702ddc7569657b6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156384, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "1066d1c1b9f5e1a2ebb27a9cfbdf9da8c035742ce510ce77d5c0ca53b2285deb"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "120d2209b1e51eec9d9a15cb4cc1c5b3a43f1414a30eb36dae378cf904683652"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196792, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "691102ef81b7942ca656170043a9546c70ca3ebe7e0125ca18da43cb44b51c1b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 190136, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "9f390d6f6784a8009deb428f71954194fffff9f3fa93a823e7b5d51aab4c1fc8"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 195848, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "c57a191f1eb4fedb3524a66905910f808310bcd0df942aebdb7f4834ed2bb87d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 190088, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "9d429352d9c1c8695ad45103764228c0da68904c5d3e5b1134a0517764c5bb59"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "9c23ce9d09ba217f7a657d52a26498b93accad1bcea4ab7b645bd8ee5a4c1bc4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "8265e1c164b9064ab7ca121693c4c594f1fc1aa15a0026b55e590c8a3db65293"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "4a79b52460c84b43df816f18286f0aa27fff3ee7e6d33c5da7b4c34dcf00180b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "cb37bbaf39c78cc6580c5845459129e111a16aadb3d9abf1bb9a09af7bb92f62"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "82780956ac69537a1d5454f242f71adb32b6f56879c61b0d8eea3b7af4d95d6e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "96971ab537a1238404aab3e95ab9615c6f0880c47911e0052b5ace1c7f0fa843"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "d59b0b1317e807731a79c30df666a63974e73036c5c2251f7b451632cdfba7e4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "54f713ba6f2f8df655b65b140b32ce327cc571779c8b4b15737aed7a86c5087c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163168, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "ce855cf28d105d424dcc73f13f679147ae9875bda87c4f67cd7193a784ca89dd"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "629f7409add1195a82e0634c5cde953ad8e0b72f01c63addc4692c03faf810b4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156384, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "2c1c1d17e9931aa7167ce4a24616a755056885e4fff36b6ee1007dfaf95ccb5b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "fdfb2e9956c9a1fa411b1d81a3d2126d248162fcc242ae6f442ec19cc13ef8d4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196792, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "4dce0cadfaea9c214e7a2765a1104ccd6872334abb5436d1dcc464274d78c8bc"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 190136, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "b1e500af920b4d678681ca20fb9dd1a9ba4d704bff2a13a2eb9c3817af81fcc6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 195848, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "e1f00765d38645eba06dbb1c13312801425feccc5d42b40a428b2a0d2827f684"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 190088, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "7304abb3fb4dd8f2bbe49d2e6d43359dc61cd1fbd27f2145200d58f9a79dee77"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "06cdc517cec28b3a178e8fd9f57ce9d249907c463418e29bb050e49d576102ba"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "f7fc102ee9b1b0c33c67e75d24d494743ce4b4f4a51fa86718ae6e919b6ae557"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "39207b22bf132c0454c41d615f6c9f64df107ded237b46cfc4394d1767f719b6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "79b0e3a72707dbbfbff054eee6ff3c6c829e31f8fdfc4c3f035696a64e7808c6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "94795ca5d417dad033e4a8d632eddc28bbdd3e561d7f6d4dd3d49dc559cc0048"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "50172e4d0a79ede4352779e9ac54cbc9a3886cd4606804006551783867114ff9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "0cb0473924b1b1764e66ccef15f141ae22edcf94763cd23d1f6bbe0a398262c7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "06b5cef4fde073a804672d68ef6e4873626cdfa532be17a70a3e76df52c94f43"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163168, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "ba1cf3cced5d9244958bba88e954fda196fd779523870418f7d3e3a35fa284f1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "e1f1ba992d292a462bd2292bebdf66133b732b13cbecf7b0ba1bce9b7513eee7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156384, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "ac022de1d0686111574f7a477ee2c39ec1cf533b22be68163a88d9c82d3898dc"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "a4c30192b615c8f9d3df15aefe9e594e146928892170a4ffc19bf0a489e1475d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196792, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "9a0a06f3b60614846ae25926c9bca1a250f771f5a4cd7498f498716084f39d2e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 190136, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "ac7d7b090feb8af110c5a0d0956904ea45e70acc6f675766a283dafa3a4cf4b7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 195848, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "e343cd982fe069c42ae845ddc7612dfe6506fb1cb8ebfed94b3de7e735691ba9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 190088, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "3549ea5b9c0f8d215ba632d4cd3d359b82c04cec5c2e22d32210d063e2aa1486"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "9732cb6b5978681837e62211f3e75687410a54c6050fa8016dccb1784c033c40"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "5257b83f72eb8166f7bb6acbf80d199ba276164cc0ee87ce4d4c81965d59164c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "3f669e986415c00708eb637dc6d5212f37b0af7b9e9481aa027b2c313b00d6dc"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "56f1ab4fea0ec942e8c25d1e4f877f8ef4251495903823fcbcc5051d029fb7a3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "fa33f8299b2df4c2f0c0699c5aa338190fd207833c0282c606131a18d6df2301"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "43a874a215377465e43adfbce1ce0d6703f99fbe5aa307e776a6fdb3da18071b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "12ac95bb66bc06c184242b532f199eefc898d4d37685f009561a3de2e07a4cd9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "600c633677fddad3216a57f18f9355971b6477999e71d1f5dac3fefb1973a7e2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163168, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "6d907a6194b74f0a0b54f2c4e87b15e6b4addd2d24664470a8a5a7fc127d3280"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "90f756d7e5ba1684427b40f7772053cc9c3ce78036f84f104542111678a4e65a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156384, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "0c06bb5e1968a15463bf9cb6ece81edc1dbe4f2f219271a84261520b7f0d270d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "1cc80396ef57c6cf616fbd38bcb819da258ab6ad772f0620705677da3d9c410e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 212072, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "f1fbeb608cbd0d522ed0cb4853c5cf050be484f48a5a43ecf1fac89582cabac7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 224440, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "5d1c4ec88c3a49cd16223ee9e7bc06ae013b11e58108c615255daab51ac3c4c8"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "0a2168a4a6c023dec143b10c5a35ce1275731258cbc7e8803597334cc0efb2ec"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189032, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "c390748a175f895cad8d65bb14ad5a706fa64914569858936c4c555a619b8bb9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 203448, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "3a9811774e07af0cda29fa810324f2d8667412d3f0f06adb3ea07010a56f6fd8"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "69a113764398e992e331e281297bb7537f8132a98e166a34a965be763a49bf21"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "4f0aa507c8a56a8dffce5c0cff7da56e93af256e6d070c66422a1ab052ddbbaf"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "6091753f12219a8057031448d6e663b3ed14218f1110b632edfd947b4ab39b20"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "8ae33d48394b9b2a4e8904dd595d53503d465ac733f4804d0020179dd9e487f7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "cdb08ce387bc7cb9c431858e568dd911f736e1c8b1a39fa988bb89deaf1b1b96"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "d92dea9162b84f702f8f37c525e02588790b4c93704c4417e8d6e0699c4119ff"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "f8cb30e1c54fee43b3b7797c07a93cfb322cabbeb59f73533ffc02a4126ed144"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "89e6c709bc741d5d5b470c43ff934717df20b6f1e802a356f2e7e875998bb472"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "b445ae45cd3a2ae4eda80105ba055f99ac393a8358dc7c760a183ed3b3e997d8"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "4afd62890675ffe6469cd1071623e02374f13a328d990e1cec6d7e915e4b6c93"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "51410dff5396bf19865804c35b66e5d99d3bfa45dd1e05bb2d901f8dfc34519e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "5138c7ac00e25d44cb823156fb72fa534503f79abf785169e613e88066954d16"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "781f3ad4f439676af326c98a3823d107ad90cee0c26fcc594acbe78e531ffab0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "aed22c8d33e03157dd3943e8dbfee2d95b8f75dae8abfe68e830bb79e2417eec"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "b3cc92d9b810a2d5921a28591bb1e06bfca62063191c912aab916fc9171ac024"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 212072, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "8a502f7eabb9dd2bc15da970c0c5fa07b9d22842cd0ce9e517f0dc494ddb2ae7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 224440, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "076ae68dd623a418db14c8bf65ed1a827ee9b24ded5e81cd6536ed69afd8ee73"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "8c80388e0747317bc2fb189a13c798ac49419d6a2fd11b4d473b321567055012"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189032, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "d4fbad30c48cd3960939e91c1185608546b8d4565259f70b0b600e98919c34e7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 203448, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "bac06dd2bf1f11da437e8b8861ca3a16dc2b806372d3a53d9954d1abae79888b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "1d4e3cf136ce289b2161cbfdc251c10c1c71a855c5d460b4dcb41c8acda388cf"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "3b2ab5f60ef09df4aabcac19fba46c599bd5d38306c5d3d1edd12b52b81d5df7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "8b2bca6318ab079cd7ea6942dac98597f0ff5c6744c32ab9f6a2da09d1b71d08"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "631a67229bc7121e0681dfbb0fffc462a2c3a04e9495d1c112913d6c13d853a5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "3bf2968e5c0b4b8fe6d5c6bee08919c69f7be512bf1dbac3ea4694ac49a170b7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "f121aa7d196aca37002614eca5f5ed644f8887209569cbbee1b153b110b2b996"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "4c9583c2fb7934ba64f6eb34f50394c95521b6bc17292d7edbba187292bdf828"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "b8d13de4709b245d911cca65d22b1d311a93100b9b1f610385d525bd30bf099f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "a100714f111d86ac3d2c55b8ae22378054f2e31aabb742ccb08ee9a28095a279"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "f9fdc31e2a7eca6bafe612d06a09fe1550307677b60f02d992545fbb5b630d38"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "567fe3a38284df3b00c23776f99b3be3e819835e98db7ac514dc50ce11f72e87"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "36a152980a8de15b9e7bf2b0a7bb0ad54ffa1bc0f0184031caa1a803af3f9edf"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "b637a51fa87408772bb71294251d5de7eefdd6fde0543e3efbc9fdef35b18ca1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "3d1f999709d5590d22dddf608720b1e1cea93a6ea969b4f5b8097a42b301b396"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "28fd951794159843d76551083c7def1b8b7231dbc1c43976290d480c5479b9f6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 198728, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "9070dc8f7c15234e916a3d6e9337eabfddba27228adb588d47c5b2338b0d7ebb"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 185032, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "c9e07ee35027f8e5853cc150adb8d9cdd8d637f79bb2ac7b3946fe4f6a97e0a2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "97d55707a0fd64d99e9841dc10070c5a99d30682aa32c47e7b96d2d62015149c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "4bd15aa08e018621aca774c7eaf2ed01235f38c8626f2c02792be42a3c662ce3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 169120, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "f52346315f3fae42915d8c0e1039200e80ee8d702fcd6665efe97fe91dfd2ef2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "1b75317a20257f2ea396d1cbc542dd5596443b4d361217c81b4c99a8fb0c5832"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 153376, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "136edc11c1157bef36b6186381b968494552b531c4916dafe433bca40459606e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "d0ad04615fcf701c09ca583de9f83d9b84bc1572f4aa52cd42e4dbec0f7e5861"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 211560, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "6997b07e3748ea49f8aa82cee1c1e69323ffb165f67dcef6a4bfe0e43af5f1d9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 223928, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "668a2eaf3daecd0df183eed790f0ac786de7ce38a5c20b0c3a174cfd166b85e4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 223896, 384, 2, 32, 0, 3, 64, 0, 3, true, false, true, false, false, "8c981cb9683186c852e7311849688a627bd536b4a7b513fdde1666e458324e8c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "a4ebfb5d51e2847b430d559ce36155202b56f96175c21f9fa0f7efaaf42d9e03"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188520, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "6f6860134c092a943173cb0b4042e77457da6c86475d4ac39e646276332eb650"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 202936, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "52c765aee84f58b838ca88cd0f9c581995616eb4345aa7eb4701552d051ddb8d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 223880, 384, 2, 32, 0, 3, 64, 0, 2, true, false, true, false, false, "1ed0cd0c0b29c911132ccc5032ff03c5c2da93047d6d49a0367330216f4b36d7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "a8dd4e14c29e773ae7358b1885166a969222c4acc3399a264f440a67e95fc32a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "9899a85132fed3337154d47b9a4c47c78215cf0acd75b8c1986790a2edaef474"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "d5c2e456260c9338ed5baecb8d8e9789f90cf0d454fee8228e3678dc03a0c98a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "670b3115a4387fbb0e6eb99c99d88192edde35bafd45a45c0a7fbeb17c30d3fb"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "823f03619ebde7fdb7ce72edcb16fafef72b11e5c62d6f6a840654ec1a9fed9d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "0a7fd34e8510b42c6b3339deb1064258de4b6ec013b4602947d4be15eebba2d1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "de28423ad84502edbd92d0ad6da43ac5742b056e5c40cfc9a639f3eb174d815d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "ba988a580a156e5ecaba7d97e4f221c1c0f0e06f06c224b4a425c7a812b5faf6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "8d3639fcf5dcf9125b3371dd43d5fd4e0fdc2c118fdf8cd46a61a34606d387fa"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 223960, 384, 2, 32, 0, 3, 64, 1, 0, true, false, true, false, false, "afad89da7b179bc0683b45711c44222d43324ee11da6dc03c10cdce649570c38"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "5675e41a4cc002394f9afd1e01eab2eca539f9610e034be88178f39bc694b873"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen", 223864, 384, 2, 32, 0, 3, 64, 0, 0, true, false, true, false, false, "ca92727a05392077a4d6097c1b1816f4a8e681e80657dcc99b3cf814a51e565f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "f5d3337d185f594c7130ab38f41b0bcd2ebd9ecc11ff2f8add553af386b2ad99"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "14cda53e62a50ec3a99757164be9e4a9b318da935c9398b9c18f8e216e51e0fc"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "692baadc914546a7b8fee5e31b9a32f4fc59448d0cdc47d94d38c357fb22eeac"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "5577432dcd7beb0c0b83d8ac6a13cd3de435d60bb253ef13a704fbab47230c11"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "4ed6e6c35b16da140b8413f480f08e9e577a799971b9afaf98ebfcc915602f95"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 211560, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "493681c19dafc6399ba77eeb94e3d3df2c9f6d9116e61011b6399c2982764080"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 223928, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "d1a3df69b11f820451b35e88afd740ccc5408c106840aaa346c9376f182c14f7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 223896, 384, 2, 64, 0, 3, 64, 0, 3, true, false, true, false, false, "2accd385cbf5c6a10b47d486d054e6921fa39512a1672e9074c198da1691d62b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "723c2df955ce79d119109df43e590625f917f2e4521efd649b3ceb3c15f5dd07"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188520, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "13a56b066d4a554c8128b9f89e0e06a10460ff6f37f6c47d85850d6b1624a755"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 202936, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "fb3fe5f0f65ca2b71490c20f0017d92ec43bbd6bed413d69e1d913d5ea184571"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 223880, 384, 2, 64, 0, 3, 64, 0, 2, true, false, true, false, false, "ad881f1d49aa90b5744aee8ef0fd8f953a79b7b0ef4af68e795677d53a32441f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "2d38c0573b7bef793ed47ae3fd4077c926bec88f9c8bfd5cf95e1e0a674fa795"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "ce62f7e9531fd43fb61a7b301d1d73f7461c28f0258bdf83a47f798bda641685"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "a34603f7fa9a5687abcae2a0d69ed95f1821bec4e2d620f9f5005bdf5fceced2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "d0c1c4066b552430b4fbeeebe375bb0a0c99a76e093ed119ee19c2373ce1cb78"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "df2550e2acd1f6d991d6155f3e321c0c76860dd614c3a79fccc6009554fb6aa6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "90b8edf1ffddd889fa3fbe7a2add91057fd57e6da0792d73ca0f2ac96963ff3b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "8b13b506f3c0e03049900bb0fdf7945a5686c6e761ac68ee857517e5e714a657"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "e534c743ebb563d7ef2a8a4c1041bbbef32805d1404049b3944bc0d9c49df375"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "e619a041785a367d219da068ebc0ffeb63367993eeb77740e3424476642a0fa1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 223960, 384, 2, 64, 0, 3, 64, 1, 0, true, false, true, false, false, "54ed9d3ed22fc8287126c0f95132c094e98e939a7d70d3706ad4961b699047c9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "a2bbcdd6f7fe1ae3193916e08af8c9c3a99577e26b74262287dd16d3f9f0158e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen", 223864, 384, 2, 64, 0, 3, 64, 0, 0, true, false, true, false, false, "cd0997d76167fbe6d48d3825ac52700bc551eba9e955c63eaa207e8c170f88a4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "c8ed9cf31e2a5a09567168830a6bf536fdeb5ddb5db681c87bf6687cd8407d2e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "05ce17c65ce12180aea192217bb429c78fd66f432fe3057662dd1653417b19b9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "af81df967abb91264dec666db87c903d7cd5907130c6807142f3632c3fed2a00"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "210d255ca3c484a2b5f89aeaff646e9454796424747d1db0c186689aaaec5769"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "ad59f8494ce298d04c534bab615198d188e7564568da587d09fcf842d0e13cae"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 198216, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "bfd285e32a4ba14f34420dd69abfe6f37549d56bfaf6ab37de4833284cf8ca5b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 229272, 512, 2, 1, 0, 3, 64, 0, 3, true, false, true, true, false, "42879118ad314148ed197fdabea51333b3c514c1af1ffa3b5b30071b213a27fd"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 184520, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "40bcc1e68cb205381e56e87524e8f41efc3814b1a0c9e1d0acfcc6a752e5d1ea"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 229256, 512, 2, 1, 0, 3, 64, 0, 2, true, false, true, true, false, "6a147ef279d77ee7694b5640ed0be6fa6884033c4463330a9a3d012ccaf953ee"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "7669c66066989fe611d8c97d20bd3c433f74f3e288e3c5b7a2432473ceb6a782"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "4882486cf3b7909908416a576a41a5f786a5dc233681d5c8e8d2e2efd3c5de0e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 169120, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "9c1908d327275d06fd8213cbe065eb6e770e7fbf8637a5ba22999bb139d3acf8"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "659c2d902e43e5f4f826723a1492d5efcc1bcb813a46fbc4a833657bed34a993"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 229336, 512, 2, 1, 0, 3, 64, 1, 0, true, false, true, true, false, "e3eb7e199be5981cd7a49df00585e623e035b742039bb0770d3b0954e5a12e5e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen", 229240, 512, 2, 1, 0, 3, 64, 0, 0, true, false, true, true, false, "a3be72cae6d09401ba9c216aae796d13f68683db112c33753e6baf8114764c2b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 153376, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "0e98210f5bebe129024da6ca2ea6e9817d01c5362e9abffc9ecb965b7df631a2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "aeea376259ef9343b19d299f66a8b8416568855b9808d209fdb1cf8c840921e0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 211304, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "53be962543f309c0ce2ce5690e777e535f9abdb2bd21ae50c4d608924f3e018d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 223672, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "1333527c90e08d77ea0c23f5773ac0472d49bcd728bfa943370a207a83dd8875"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "0265fa1342655907c498b714178e4cb615d0534d4d4aedef94645b32188d379c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188264, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "c4ab55d4d309e260324c967d84844ccda83400d21f56359e049cf243d3d428ba"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 202680, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "5cbc5eafc412c46d38bafef82c64a3731592818dcee6dd124d6f0f89fe672a57"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "8a9f962856a8b7741c978858a978514182b6ce6f285b8659ca4de5955a52516d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "ae961db862686a0cd79be90755233069402744c2c98cf454fc3fc255dc225b12"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "029f5f1090c68b4f883e626baae2f47682b360f0eadccb0f7948f9ccd6898235"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "d5f90a5f967e7c2d52b8cf45ca993286a9341d7e3cc7bd9f42690dc407031c32"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "b98a7652aec0ddaf927ecf0028c8643376aa67b545977d7d4ae29a8b388f34a5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "e4c4461fec6f46ec7582ebf1077ea29aa8612e7ea4e1e06c5e1f1731f620ab29"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "139c79ccd3112e463a6dc44c3e98defe98e08e52bba3232ac4bda6248a575eec"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "a7e6a9227d214f8c3eff5a5726008c3365a7e766e6ccf1cef0791a39d02f1e9f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "6610419042add5cb0cf7471f3995fc7f7c47d39e6e3b4de865fc17506494a976"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "3f8e7dbb6e06eaee8ae249aad55d34d83f5cc1f04b6683851bea52f73174e38f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "c02f8ac0a115a18a7e0b133a1b5cda2226c97c710459a8efbc259be4297170d9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "be99226e34bfe2bd2e9798199ebfd8e9586fa16030a991cf0a0aea8f27d4fe11"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "dbde78b45e78c16688073af1d975310a68a7e09f22e4ace57bdda671fead8432"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "ea3ddb40d189be5925aded5310bdca70e24a29af8e8783d7b2cd1432969e2464"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "0839f86ab76cead728e6393fdc09075de9292e3b44f892fee50c80797c212afe"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 211304, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "7b19f84b21677a244c86a9193bd65d682e8d04d82e5d224d926820d4caae8305"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 223672, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "9b179cdc79eb285e37731f447376c0ca57931fa896f2b286960e852e4deb9c7a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "6250928a4393f88a67c757ea8e7b00ab99eccdf42fddfe4639ea2bb91f5f8625"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188264, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "269abaa641dcf8316a2e85bd8c033e53c97b39d4ee418edb4b4400977d5acd7f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 202680, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "ee008da8e1973877da3b15aafd3192cb329bdddf05936c80fc2c9ec2a9ffc696"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "9c8c87746cfce8dcc539965a1e44de933941104f4cea81b688359c4cc2f6a1c5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "33f92cdc91150a5903f981b8d9d1982df85270040eabde1ffcf2cc02fc523596"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "832bd7db62ab991b85240673ffba636324bbfae6c58d2faa68d67be2517fd896"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "3103617a7e1401d39ac7d6508ad0541e2111241421a0021f046a973a80b798c1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "9f7a2fa64b7499437210f173681c07d7e6bbbcfaea782ed3ae113539fb979e55"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "df3ee18314d9d10ddb7bd36609f4685762ce2043272028c80aae7a1a227f1d52"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "00cdeac494717efaf8b99c02b6ed0e06cab21bec6877e7558c28715eabd1da9d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "ac5b22acca7045e393553bce26bc9b2bc2fdc007df91027a09378673bc009983"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "13c971ba3960986c6581cecd563b41d8e42cb530dcbeacd1962f97f7d5bdf69a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "ea08c96d35f85796ef5b6df882b1f83531b42774c03998e119fb64c14f0b38dc"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "71cc4b309049182e574575c537e1cd364e191abb9d414c09991163e740c6aadc"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "f1babdae8d72c7dae486612a3755a7920c857f1e6a4273c713bb08675706e94d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "a9e9dcdeae9755e920cc1bb759741add40d282597174d7d3a2e9bee1b64f60b2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "7da4412c831428c321e9fff50c0377a3680122445a9170f98b856dea4bc14e65"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "5690cae0d84a3359b761d39e2ec361b8ddde42876a7857e502cb16972eaf1d2a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197960, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "6ff593df4e332332664b0c985eff2b4ec036514719eac5f4c569535b4fc58a8b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 184264, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "8f7c4ed1a13cd3a5ec0cabfc3f94d7353d2aa1d68f1491fde7dae7ee43be8d0a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "6a4c48a0158df0ce403c7d52539264c81970229662c54fed1cc93c32f809b138"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "157d0296b89313d61a1d3fd34bc5276a311c8c19041f60681b61cbd0d9df6f52"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 169120, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "c2d611f3377c6367cebdff2bd7373a9e7fa6f58dbad33078c35d7a4d7265b563"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "ca8eb080bbfbdd5c80da9036980fcfbd1ea71af2c8fd6304f2bcf17bc82e70c1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 153376, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "12ff2f48b0ed25a6f6f1b336e2147ae0e101c5081d74e1c85a391c442c440d63"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "aef5d492caef6cf2e0b6a47e29fb29270ed4cfd199df366c22bfbc4f10b91c26"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "b3e89918d1752f34b58e54ac74380b2c505eb1b456b39bfb63fafef222473f8b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 228424, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, true, "b3c55a0605eb30595b9465b06adc5ee3d053a1aec21c691b27a2bf65536ca0f0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "b7b199b3c84b2b9708ec31d738b188c3bb59f570644d0f0256028fad91d9c1c7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165056, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, true, "98940302785e2c5026caa59ffd81a84619de32ece655668d3bee99249c1de857"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "a0191f33be09d879a76630873e6e66f8ee3183058c188e561d36e1a42fd40d61"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "ea4f9eedbcdffb39681a47739e255e3862c26ffbc2cf238df1ed2e3468b3a6b8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 181536, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, true, "e3620cce79ce0ea7e01152a5e5f713a39ba518a1ae2bb63f17fab8fc7e048b9e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, true, "57d855ea5935925dcfe1390c91cff69929914592f1fb97ccaf6f814a35ad294b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "5b1ee90ccfdbab6e39e1ea896f756a33ca85c540654c3bb5a52b72d44d69a44e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 228424, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, true, "c939b72477160719a3f3bc34b915468d40a8adbe979cd9d3025456b221f31927"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "faef749ba0817c398486c14f528eca3ba6c54c925dcb17e5454691bbe4dcb677"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165056, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, true, "85804076d46985afca3e181534c1feba537186439957cabb4b5683e397b8c47d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "9d7e6ece71d6547327f69ae9434efc314141b4a7691fb4a8eb39745f65bc0ccb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "bbb8619ec41ce2fb842906ad4f036c62fc61727f04624b4ad5a5e74e6859154f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 181536, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, true, "22549ce9a9b92d881482d6f590f17aedd786724224c1773941fbea873d8de278"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, true, "13981959261a16d5618a80f1187788b7ac4f4736c48dae62a0367eb25accfa07"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "6d3accb550da25a854c8177857c6642f2f5d09cb2d3cfa56f650e8b4c830e6a7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "e1ade90ccc2ab5528e415136c4f026605f4a6922fd0b13963dbb406b6c79a06a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "19f7fc0abe5862cc6d40e689ab55c8f31f9c7493f713c92914f59149dd8f1a16"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "8923c689ff3354a24cbf068d326d96a9915aab1bd66ba03a03b36069e07d0319"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "d1b9a52d31086d96e8d464d792e3f384a15a42fcda8f4ec6181e152c622ad3ac"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "eb1141423c1ce2abf16803d8f4846c049d00fd50e0d86975c53b9a4acf482e5c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "fd245ba9e9cf8383d30c4e9452305a3e1663d73d3c9542c1137a3e39da55ed32"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "2be2a49e09917473082986406e9f1a972d9f37d830e17483998013f073c1bec5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "f9979b312fc3403c1711bfacfacad4c9057148f12a10d0098118243e00288815"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "038584fc7194236686f05be9e22d2b362b849b833e05a017308e5149794baa6e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "aa407635fe4faafb656bc74ddeb074efe913a2ee4e186a40af6aeb71edd97a82"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "e1bd79ac1c4254104d810ed3b02c046c35412700afaaed563dc9a7b37c71d894"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "7e35e9899a5228562f180450ec43bf010ffe29093462855ede0edb3593abb932"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "4ae0e8921c1612457667560f1b9d9292e6bfc10039591a722109cf58d3506709"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "6608c7c3aaf9346b264fda5f5c31d754791c8a7df181ab7d31fde01d687e6a60"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "8b12aa42dff08a04f8e86046a64d360d9b7904438209ac1a29631c3ac4140ff4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "25521fd25ce5fb574b2c8ed535496857df1b2f596361ce128b18910e2a1765d2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "0fa0a462e83f48e3c2c7cdc789aa2056d338b6ed58ac28c057315cf46f2616ed"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "8c42a04af195ec517c615b1b7b6808192eaab5c97d09c0b18a6c815b725a4afc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "6f52afe874c9c1023a3665bf82b0873d0835e3aa386f1d87dae11f8bb1ee09f8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "b3ecf854b42e106e8d2384e663acbd3b9675a560a0a144e7462ff985aab35c7a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "68066235864bdef1b5e89339e0fb9222b74672d2ea7ab09a1d64daa380f803b7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "e782a5bac913d2d963dee947362e860e183dba0bff432cc3cd7b818348376a73"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "8f02adc15f91df650b85c7e3b466615553f5d77b8788c82c92ae38dac5ce802f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "923753f49e803c420304053322c9b247f13c7e137b1fa1c9d4accfda46728064"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "8bc562528137c673dfd262ad61cea5ff6e1fc165e14f93b16cccca6c19d84be0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "5e66ccb878583bbe51395e9bac60549c1518a2269f37125b8204e1e6b7b93f75"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "98b3922d13aacb13cd9b9b44f0445b64efd9259696e1d850658d4fa5293d6dc5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "30c48050a0805a1d92390b2d24457115eb3ce1ecc43836c85118b5b59f1d48c5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "d18c34cf97a8bc9859da3ae31d0f9c5f9ea2f6c7dfda92b512729ba278b147cf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "201739eafaa4b5901f679f7909866baf56b0728620a40d994a56717cce8b03a2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "c7c0d0cd2d621bd594df8b6d93bf79e7c8033407792529ee09d3ebb70bb2f6d4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "a645309a347427353945c5e88f880084be8e334abc45a18f4b987e0e6ea8e431"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "7aa6f4885442a2bad1d1d141fee612e097ae01532251c408d161d6764fa960b7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "604a56b2a439ec03b7a684b89bb2ede3b012e55df6d5b444d680c4ec3cf44e65"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "ab3bb014a727d6759d7fdb51d61cf7b07b9cc60ca3ee954c9824c23b13d1a4c1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "31fb0aa6a6e28a14c565bf110db459b5d70686fbbb9e5619c85b6688d27fd2b7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "35fe046587afa210eb48ccdfa6f81e2ec8898e293df401bc61dbaa82e672293e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "8523c82dd69bc6bce59b15ac44a47f6ca5e103361cf325d18b69ebe727ba6ea2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "75c7eb8d668a7f53577ee6132ff312fedd3dd9b3fdca804c4e79757594f87a61"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "4ae343b8542d9704ddd4d9a7d4c08fa91c01e6ad57b09acfb364b292ad80497d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "92716deb13ca1f163e65837cdfdd67fd08cd08e21ab615ff7ad7ef6a0938b8db"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "413b5970c9247dcb43ad805ee1da929d7e5f292f5a42d044a8e449c54b2c21f4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "f60f66cf8b60a33c56c1fdb727a69a2e0c567960ae5b6b90ab1866a958fe9e35"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "31454b8591650886c9f3a7d52f0d86dc325b5df971f276d4382a423bb8cfd15f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "3443f0935a75265dd6ec8b824b5e92a375fd10245c51cac5e4c19bf28278da90"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "b5ecd4736c4cd224f15c072965888fe9b82c4121b356cb8a5023b35405f834ef"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "299a28b0bf27e1e732931c8698796b1a3913ce9f9ca266cb33cb736e0ffe9ca0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "7443d8214a4521d6a4a84a0dbfd195d7fcb6d485133aa4ddf9befcb810f4a2f6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "818002ba0c6f358a83f87aa89c13872dd961c61cdb94026657de7fa51f0948ab"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "00206e939f742fa6021d74c99ba4d4f58e3c106108f35441b4479795bc74a0cc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "1b55953bb3ea3063a628b24d0ecb6b0710de89012e40ea3efdbb54585474f2a6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "b1522da0458ecc8488bcd2e97a35c8c8aa6c629cadd2d290a659e72d3394cb4c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "e184ace6f2e6bee228d89f876eace3da74e44550af2d9caaf34a74156c6f20b6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "ec7b2533e9368fbdb27bd2f664d8c718578c264824d602d58b75b1f30e157d60"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "cc05436985435181db0f104c62865cf51fb2f7775d0e251b9c801b3b7f9949c8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "cb331c3abb0ed339f79017994594b31caf277d0e3f0f3ad53830a97cefd7e0af"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "89a02c0cc89e23db8ca7924ef91d1a135aaa4868de7bce95434de8a24d3dcde5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "9521a5529d25360bfd28fe1c322bdeba5198c8f25ba45f3f6720b833517ab5f5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "568181e73679c76b9c0fc840e408a850fdad7f6fa718f92f62146138917be43a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "5d956adae72aff88f35f654b43213563522519a189da381928c0b3b070d0224c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "69c5e1e849222e1981f0fdc5589ebe4451e616516914fe9bff9f989859134ec2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "9be0ef81c390bfb06210b08f03baeb06716946501cd85ce91e26fd88933fd775"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "fb41491f750fd548eebc60a1d2caa9735c31037c71c9c56aae583d71987ed018"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "367f4785a2de32eee46dc2bf77c07a8d82394a04a1fe1d2d9b81787b2908181a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "b4cb7d64fe107c1aa87336ed9fdcfcfee838bec90aeddde2ee59a11f8a4304a0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "f782cc517d91c913ef4be7a38bdb158afbb74da753501346e12ac1cf68ca53db"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "c087fcc5106d27cfd171067f8ed64a10b6598db0c30d9e224cbf40d0255eb779"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "54142a0634c228ad83b59f4d69b864ead374bd161290efc42923f18d8dc6f7a8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "67bee5640c5d8b10edc77edd8cee1246cb315333e8883facc24b06dd07d32551"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "7b27629ddda15570220b88971f85350adbd8254122ffc217b7ed7db139011823"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "a1f69be1192317e7aa6354bd28050e41f75d644e4a0960c7869dc48b5fd806e9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "37eb7f23ad2f1482748e87fbc27b4bfc098d8b20dbe403c1664c9a2d65ae0da9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "e2e7be7a013534c1785de1e4eca3c252501187d78c48b7f1f3dab14906dfc0f4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "aa0d0e048dc4574f6a4624cc6f7a17b29fbe71830d84a39d1eab790d156ae96f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "576fb6572913bf6030f2ebe57af2955b1c06d7ced65b10ab13d15e48b7606097"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "0d3ffb382295bd1c67a920ce6f8545bdfa016cd9d2efaa1ea68a06eca9c0397d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "2559d057f01e52dfc5a783c2b5d793caaf3497b45074595e44a227ac083cae08"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "a20537133df7ef874a8ebdaceb040e9a88749c725d15dd3e9910cf605bc927a0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "c47a24d042a6b6275856496f53618d2978f6447c6aff311f546dd496772b0ee6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "5e094904ceca3b4eb84582defb747b1783449ec571a7ec250930a4f600b22006"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "678e75c8cedc1e97cfdc2531ea53892a7aca21992eac4648b1d517705d2af4f1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "58deadbc01a64545e8b199ffcf96c2a1dd9b6b480ede4919c7bd77dfe2ab921d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "f64b63f6e8bfe9679a7d503723ea9c0f38124bca8110ec5f6d97f94eb792aa92"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "559eec34b3fa63fe1cbe2e5b6903197b6650aa8fd91c20d1a9ae026331677321"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "303872ca429481d93d892793ad10e3eeadece1c52b00a889af76fcc6bd7f526c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "b4b9156c7aad24256cf2d59294237021eac8ef4fbb6b0ce69c963c7a5ffeae2b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "e8f2e8e0338f3687580cb741b256b3db05e4145caa59a07d04c1b926458faf3d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "b3359b008ccc3f047689e898f3afe015c865f87148e07433ae8d8bf6d212937a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "ed7565393ede836394f85a878696ecfb77c966ce3af25533cf2f066898353f4c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "ae42747a0fc6cf87bbe5d07c7aaeecf5caafe55a8dbdb4e537a12eaf0e7162a1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "b50d063b7a70e06303000d26ea616546a0dc00dd7d5d0c6ea1b871ce46cf4982"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "fbb0c356e4fa6091116e3f7881121b2e1eab265e1c46f827c7921f02ce166e85"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "700999cb5e3e1d7b4cb1f92874debbd929534e1a52454bb09bd7c03b0b856307"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "1f841226edaf2a4d8f65e1eebf40343fb56c5e9b13b5387987c475b48875fc33"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "bd2d9e2dc5a480feb05ead42887c1e405c0b4a6c749c01cafd4df391ee04fcb5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "6ca262c571bae083c34fe0b66d23f34df4b7dd9d9b09b9f47bcd59a5f2b16c9b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 191832, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, true, "1b9899f9e8546af5074997fa6f1544bd92b2d50e7ff7adebf412271e052f19b8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "745581c0df3616a5cd97c924c7c7f7825f486fe9a6ea0c8589f839f3befcacab"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 157008, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, true, "3d4ea9ab35b2a118930c45432a84a2b06ca2f9e7af60e3e844450d1bfd8a2f2e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "cad86d9d14114ad3feb1ccacfa3178e6d3a1e70c0d71c569c73897a7446d54f5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "d44a34e99c28f0c18fd712b97f3cef98cb8aec8eaf4c3b990a400cf36604a871"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 165296, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, true, "2acff856a5be93d90e477391cbe348071619da0978ea3d5a4503ca8b6c959869"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, true, "03e9ac42baac1e4db22b6d8599fc8978d00792b38cc3a3acab43cdfea1860d28"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "e252a8ae98f3c29e586030142b4842214b0da25d3ce243bbc3f35ffad0f97598"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 191832, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, true, "42c2364c1bae1d0735df49564eebc09f13ddc3752f47c12f5f859c835817c6a5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "3920be03d5d60f7c4f7d1288cbdb87a14529594129b48d30e6d06059b70d1a5c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 157008, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, true, "c0ef94e32c29c4d2752fb465871b6019649dd8d5fefd7267ddd1b5ba32e242af"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "0df4fce56def61a61c6bcbf3ab5e7b73161a0ceaeefb40c9af2438556a979554"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "5209f03c2a46cbc905220327b9f266349c864b4e7494e51a1e09f975c6b46312"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 165296, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, true, "0e720faacc95958b3671e9f720c59649f3256499da4d02846ba2e575dc3683bd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, true, "4521d117bb8b46d0132fdacc2a54488d634d467fbfd25f4647b52f200e55d031"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "d236b494b27ef37b6bdd42b11ebe721d231deb2ae715b6512ab10f1d14187b35"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "cfe79c14353965d4476b854733bf47ad5fd7a8aa0f6c7632f0f261daf9d4f63f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "428c6ef18fc32f2ec0dda1dea2b018638828d37f41390204a05ae265cf203f42"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "369554996ffa699d6a16d8dc7bd150a6749134146fa9540a5cb7cc81a32c9ce6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "c496333e9d613c2490695523a12e16b8fb6f50ebc190ccd95e4c22eb724f0d8a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "e6710143910455483f538735599a39cc0f6ac7b10db95d456f52dfe3cf60a2a9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "74cc0a71e2a3c72da4563bf697642a665985a7fca914f2a0f6a154686cc59448"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "eac04247ff36b6a66b0c54f649d7863664143742115dd3e3b48026d0f89d2bed"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "6fdbb6a41d1fa64df83d2e87c4d86d48b0b3dcc6d0403344fe863b853cbf39bb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "c383b40b1419833d3fec0a89724c18e3563432a478537950ec68ef51f8c9d5f1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "f028f157fe5f2500ea52cb11a660887449940aa5e18c3b535d1efe469facc833"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "7503b15388ddf5010a0ac387e8c8eebfbb52e240808dc54a2d4b62cf0669ecf9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "ecf43908864ad0e47fc91dbf0dca1a9eb9879f77d0c6d8e660a6d18b910b347d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "12258951d80d397c44ceb10e79a7769087e31d47da1de21791209c656f1a6119"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "a851bf6bb94491475fead91b26310da63b47378dfc7ff90f1868e38b16e61559"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "e8e79eb0e7e31250f25486d1a76c199f65c872173c1cee0942cc8bf4d64dd6fa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "872511d8fc58ed473bc28147bcba01f9af698a32819ac288af068dd8fa8b6afb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "a1562203996c19fa9b44fb6d960d1d55ccc936ac2d0ddb2b0c38cbf30dbf1992"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "afee77e87d2e851e132b973703d1df8a73947ed77d7cc4fd04e2274dce474a6f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "7f360401f71a36b572ea0da4a78457f3265563f2a2da4276f118d0cce260cd90"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "92da8036d09e2229303d33a83d8ff8e43d802c8daac8b8b3e85d61f327aac342"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "d5f22ec21f0ef3346034dd6b3ed1334c9727a2ea5fe3c4b060d271cca4db119a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "a1dc6210df338f1f67b11d49414da1f34ab1be869f81862902899a3f760011d2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "a1f11a24156a90a59b6d5a9409271ada75e12a6eb07e3203fb04d0815a971ca2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "055d6c477c2120e44c33fbc696b80a55ad64b212c2b07eb24a6c9071b12ff440"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "a01451c32d50d7fa59b62ea6d33eecdab29541ec66412f8e06af06066361ac23"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "55055b410bf25c1792f22bafe582f35440cda7b2e44be96ed25173ac7dec2c62"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "120a2279be50366fe4d7b8150046b12947692fc7ff363145982b6ca429b1e863"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "d7b42d8f761a9c28c20055591d3cb4dd22df6e70cd237e2805a508897c110bc0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "6ecd595f0dee2bdbc870c718f8a861e837b8651036fb2dec996591f29133f485"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "827e888918a06e987db2472a0e62e57e52593fb47c369004d03eaabca59725d0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "c2c97fdb90b62c41cd5c0cd91723d1597b8109556e21decaa305ed09ff9378ce"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "167d559efd43c8d20de45ea5a397c27115a04d41d90c6ee37b2bcc0734ada714"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "c804f351d6729c8593ca9c9157e028c73f3afeb7de8158573d475f834fba6575"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "050622d11d500eb141706c5fb0ef30e62dffd866582bd9745677d679c9e5399e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "361cb7f2da56761e6c0ca875b775d68cf6bcd3bcb9b6f9d1a7deaaa236f124d5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "295a068290c438959906fd6b82ecc6f4b80f48828f5300f54ca696173ea91e01"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "36dc0cde9bd686a982cca860c61b2f2d0c868b452f33b0f9e0be15222ad46cbb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "5f8ae27207ec7062f0fba6347d0e66561fa7f5f27f10b5d95093ac3de10b4c00"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "d856199f184b04ca63dc5d557e83fd222841a163aade90021a0f81a7bb8170f2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "dcfc06c0bc9fbf1e2386e310a64793abc205d145a94d7b5cb20d1f6bc97d7c3c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "096c17e74fe7702001806bcbec97e1161efb23ab232c7dd2db5b56eb21991dd7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "d98d5ab0bf1c3096135fd9953a87a177c2b1576bc0a0e9595b6c3b2da7bef285"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "cd111e8840ca4c2f2edd66039a32e500946b7105f92ca251f115763a30e10219"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "251b8a499c481fc18e33ea03d25226bf4e387f4dcf7e57162dcb516cd41cba04"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "2937f24aa032f784b86d22f910b50209bf71ab70fbe260b98d327cbfae66e4b2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "3a018cf0828ae96b5041246e58408f33ff1ee9fa55e9c7af64ac247dd0ea7d7e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "33a23121d5a1da2fcb30bf46419840a93be9e46876ffde10bb3a877bacce9d02"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "7d9df71a708bd16ba0d5f9c46772e3b56dc4d964f6b5fb26d3e7dd260c025864"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "1edf5ab6298a7d49028589040f9da7c40b8eb1f9aebe44f9c04ce2f6f77032f5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "85979d40e7d2b75a28fdbba462dfb8dfcab0faa0ee430497f1d5469d7d6447eb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "9280c9cf4a5749e3f9d2e72370d93cc47d60cdf17cc8e8bd4d3e919ca8a9efd8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "a996a36c2c01bc71a3e8fb7cedeb3e1fc09a6fc5afa6c1a2d96f66e17dd5c213"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "6836104a1dd2124047f077fdb7113c07983e58b4fce8a8bb403a08d1b2b30f7c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "a77eba1d4f4fc812ab719ce18d811c1fa2484bda1fa077b5ad7a6781480b107a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "357603b44183fec7071c8a902f05fd9a95710775c4a4f2b6b6712c53eef721f4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "1d9c77044359fd2eedfe392c331827a62b7a1f67ea4374cec84e15df38332cf4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "6d1d4d8f48d7381eb7d6faf8dbd2b069feddc453abf3b08fe8d7bce9652ea56c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "5d32e1869848865d1029f87f13215c4363a754125176c66ba7726e3c597baa4a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "58a39628671c09fedbc45b53a5aaee61e05e8fc255ea7fe5a9b175b561b9ce8a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "2127094a3cbacc30d9f2a2e3cbbec8c4d641c20088a2a5720883f75be7a0f155"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "c3d7187796213d5095d07ebac665854fd8aea9db19902fdfb4c24145cefe2743"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "ff5c409dea4a027cde0772549dcd3c60f088f7c66acdb79f3181d2134af958f1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "e7de36e239eb8a8b43c435a08a87246a44e877ba6a5dc075a899662ff0c0d0b1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 208056, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "e74a04b4aa3f1ce02ff98ff001b95d81a2977f31eb673fefaac2463f2d39c411"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 208200, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "58b2596a55fbb7fb4390ad5e6a0ab0dab80f6b4d6009f7f65a50a0c38ad24568"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "c4108874965eb6cdd72d9b968f3f72b189d3720ba4373fa2468bfdae616a7492"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 195256, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "a536a08eb3ad5a948caf72022fc12c953d922920c2031823a52d2f038b31d2c3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 195400, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "2adf79d440f1d0a1bd486998ca8b23c78407860ee9e5449e9c94a6adccf3f9d5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "956e6ab878b92799e301c60af791212edc3245e8f825579e9d0422f7fdd1afec"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "a1a946701a42bd3afe71ec231278a93bacc3eb9678c51872b2551bff21dada0a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "5c1b348585865466521e4cb37f57e178981840ed108204e75441113e80ac88c8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "973fb7d1da0e9340b11ebfd89407700cfc2369cd72c205476fb63c562f727686"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "f9dba78deff14b3501cfc8e3b0795af1d908976debbf5840aff27ee948216c82"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "5e573a2af6cad07aa0a82ace68ec8af90eed16fb8317ffca6a0e87324c8f4b82"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "ac27c16c3ef3d029303933015fa625071ed02692f1c8fb9ba61297a617a8f0f9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "1884fecee7110c92ae37b25dff1e5dface21978ba32e18dd6812808cef463df1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "5a7ed51b2d5c2f058fe59c0cc97390908d542773f3e7b6ab17fb27d41b62e07b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "ca432d8dbb5b7587d72512d61673fdde431e27a2e7e0664cde424ac95aa2221f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "bf59b39c189453f61b9f7d1439223b354ee57c16a6c1dad313498ec2eb735614"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "ed4bd998841148f76d31da3f4dc1fcb4a9bb027ec77b5e9250e3d0ff409b2171"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "37bab42522956a6b664c1139021f14ce6e65ec5505cf2115a366d758228e113e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "d03db32e8e56d6f529dad7618d7935f984710680df398be93389d546e0e5b3ef"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "d64c27534f99bef76457941390924ad797be6a4aa37010cea99539497c63468a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 208056, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "089edd209ec1daa057ac272b1f6a2b46370566574d79340ea7992284ef4bdd4e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 208200, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "51b0f1b2fc777b67e42625c780a38f1cf9c3ded99d76437d378e015ac0d89fb1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "03e48d42eb57af25f1b000c883a74d9d15780cb218c7fbc8f0da8f28bf3963ce"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 195256, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "d282441119b2f5b23766b95dd996036bc19913b87afc222b42134fa9c2ee5c24"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 195400, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "c64978350f04ef4e50c87f8b9f3dfa7fc08219e7a85e233a9db7a6864625f89b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "f2d5d65eca30cc76740cff5e92fa6331f2f0a52e439f6c431c4a1996af4004bc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "814b444e529457ce943572abca85111b5b8d43fca995b3cf839e77474d5a8d90"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "90a1bb61365fd8f61fc6b5eb55d58eff1ddd54c8fa73f5798da02756b3f5e89c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "16b12973de1eac9113ee9c27a01731395acf73d6e83380fc9f89f1bf746bd99b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "df05a4876db4bad1a50f28f778cdc895ebf4fc612d09d7f6e4140888cd0bbdff"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "f84a7caf1b254dc1d3d86a6ed1fb89b1d340a3d32507a77143a0f797c5a17b92"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "2cf3edf5eb2bf416fca113f23b50775cfd2f25c938f255696912668b947c5fd8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "cc68c070973e886050977c0752714df1dacd8e87345199903bf28a7bc3fbbb63"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "3f5428d674f28a58e7dde1082c7264a448d50263ae99eac6530fe8f1f298e240"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "19bd8771aba030882e104f14cb7253936d0cd6f04449c10cd6cc83b90a280914"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "d51293c3cc9aa1d66b68a84b79e57603f3653418694e106615d825a9b51269d8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "a1e22485af3db3f879bb5b14c34f32ff80028f7c17c8544850f0d8015add351a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "c66b97bb31784406d9b5d652bbe4d33ca0243d91858e0efc733fac784eeb7704"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "08326d143f3fbda01e163228cbd2f17e407b541374c108095e89fcd19f59537d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "b880f6a96ff8964f6a6b72ee3af46c409e6db26ac91fb64d2f5b5d1f9be76edc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213160, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "de4b5fa9e6ad6690c72e92c0ab03fa66aec113a21712c00c3faee67fc6d7e23f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 200488, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "3a29ae2872cab85b3dd87095ed02426b961c2856ce68dde990f61f0856eeeb42"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "a77e90d23c1de792463fb8f29e495518c45702210549c1b7ebabf9c1e7e75d51"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "4c36ae7eb4664e3e914fc5eac9afb5ffe6ff277f0c84a08965641590a1df905b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 183552, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "8c807c5fb924aff06e444f2e6e3d7a1e2fe8cd0af5c7acfb0f43215fa5a72478"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "d02ec15b389f73a4713bc095eebefe0dc1c42a1d1742dfa227c61ec4c65d372d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 168832, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "1e419cac4fd083d1976d4eb7dca2fe1f132ebbd15af1ed9bcc5872bf9f73fe15"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "3047c269b7d9eaf63a2f88a7e7b77d0cf71157aecb51e31236ef741d69df892b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 207544, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "0fcfd2633b211e8ea47c8a8f80875d729ad05b2158b5e06c2ce24e6860613b4b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 207688, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "a1ecb4189f7c543c8a396c5f0b1a4679db40e63812c1585b85b0b2bc368d82fe"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 207464, 384, 2, 32, 0, 3, 64, 0, 3, true, false, true, false, false, "32f62922728f26a6c6d10f31a673ba4351e0a73826ba8284f50579f31d7c80ff"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "a915ebff2f92e2b618686e2d602b658e21deed2fe22e8b851955312122aa04d1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 194744, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "3ab4ea3fad45bebf5d171ff605ba94f9ba5f3703447423d1d2c6f4f36c951a62"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 194888, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "6c19386764c20131e1accbbc320cd570f77030b32b926c7b37a2bcdf16686240"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 207448, 384, 2, 32, 0, 3, 64, 0, 2, true, false, true, false, false, "50a06ebda90e7389ad4d39eb2105b22d812bce31a83819f42ed3e88045a454e2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "d77a3c14d4663474a3bc785b3cae85f70fe7991ff095b850662d891bdd39ed07"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "680a5c36759a49132f68376749067460388db3164effaaf832fdcfc590126a13"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "a2f547f339bdd871536290ca420fe646f42fdf19acaaf0ddb30c86d6722be5e3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "4d3e3007d5956f34d22f174b8a620c3fa9b7581503602518c219890a3adcb503"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "6aad62ef4077e0dd4fc831f42127285383c56e591eafccae67b44a42ed84bf12"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "bb19c87a523f6597dddab73e60101d93758bd8d53a2d9b4d2347d7fe996a72f5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "b7af7e8b95bf6cf748d028db4f9aaddcac74792f8ef1abcb8fb10a99f55b1303"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "fe5ecc66518300a16a35d7d9cd3f4bf57666eda0335eefaf6d4b32e4fcd09a97"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "17741f8589bc80c42c54605410c6631cc7d5e2b68abc6d742dd4b215be8276b1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 207528, 384, 2, 32, 0, 3, 64, 1, 0, true, false, true, false, false, "a83fcb1a02be97c212c8526b7f1a529442df33441cef1079a399786ef949b3d7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "51da3969c4ddf1ac18b882a22cda060768dfe2b0872eb177a63a9170d1850f64"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen", 207432, 384, 2, 32, 0, 3, 64, 0, 0, true, false, true, false, false, "3e444abb494015da64dd32f6dbce5486e4d432a4ddb3e9039cc1e0a9bb4ce6e0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "4295a6a5089222aa81a8b9e1d018535015dbdbb64102d6a77520846cfc02d5fe"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "89d656efe3e9410a5d9c91ece622e1f828b475f61cb12264be9c5b6c7ee7d5e8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "1343488083588c34a5329a3eae89f4c8809e4d68e43cccf045d31fc46f77b3d2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "86e40f520e6cfe21dc95b15f4b079fc3c5cf5d51eebacc1d57ece39ef0a8931d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "f970a0559b3c7b19928cdb44e535a43e52375657165d42f31432d5418508b52e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 207544, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "5225f6c7cce97e5d8020458ec1c03d29dc0fc11a77800152d214ecaf26167e4e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 207688, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "b2554f00fc62117e6f4c6f589e915e1698d56b6eff40bc6888cdae3bcde9e4c7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 207464, 384, 2, 64, 0, 3, 64, 0, 3, true, false, true, false, false, "61bca5d479404b844cf83721c0ab5523fcf582b5a8eb53cc073d7a6d059a75c4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "fdb21e42046e4f0a2b25710f561254877b4586dfadeafab7f1e483d9b2fe026e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 194744, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "59cb9bdc3762312918bb38615f31138e4ce30c36abf9d2c324dfc44027db867a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 194888, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "6721e3f993ffcd4ac201d6f117099344d18fbc310879cb7a0706dd0e5810aabb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 207448, 384, 2, 64, 0, 3, 64, 0, 2, true, false, true, false, false, "f77eb894389b331a6a5474ac8fc898148b310cfda8d25c1d94c17a139241c041"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "471e4c292c351aafabf7250f4b5c88b6354463e6e18a790d87387a1aeb78ef97"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "4ba1f4421373ae01ddce9baf6a8417cfc7c7647bc6a12bdfde010252cc4bf17c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "61e94417c0604dfa3e521ff66e691c75a0d9b1b58a75b445c920126b16e747b4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "d670c26a4fa2fde1f40750383072cb55e1351018b59f78185cf5d4c503c28483"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "c57ed45f9369000b5205f25abdf4a51ec6c9b29fb46e843e1bb65ff1c2818bb1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "b372f64076efd104b0fde4cb987f0d4b4dcd59a54205c721abbe676f3625e392"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "549042e2af971ba4d5350687bac2071584ad07ad04ba383566ce44c7786fd9e3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "0dd17feea29d0d3218f802d8baeab4e006e0a56d01cc717a9f57c5b7c4448907"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "4b3e5db2b285ede1397b518c9db20d474361c226b31c02164d8c351f64da6bec"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 207528, 384, 2, 64, 0, 3, 64, 1, 0, true, false, true, false, false, "94aa9f3d33e3d7d9fc04bb1bb96ac592fb1dac7a641573c9386b22213e20f90e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "16355aafdbf1c904da6e005054d11a228fbee35af2f24404175088faa2797ca7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen", 207432, 384, 2, 64, 0, 3, 64, 0, 0, true, false, true, false, false, "90b42c4ea6a5bd67c709741f9b1aa88b26ca2f192a0b38286c05ae98724e5d11"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "b99c86d36cda478e25962c1bf09e15c8df11ec87d53d5f125f720aca581654c1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "fd7c8d7c885c0d3b8d7196ddac250caf079f7010c2247470fcedd39f83981015"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "9792de4b9fff20b3c2a45424e94871a0e3f69bad66fffdb18f968df2d68f97ea"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "e6abd1ca24f56d66ad50fdd98c8a0faf361e2c8121a5da5262ec9f6833741037"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "5ae25239a81fd1b5d37f1fe789b1ec69bba19c12609ed7ab50e7d74b47a91e08"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 212648, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "f6e1a722d4a0a2f8d63e30fb7d5e6bc93c236f7314a4c10a5aa90b3c93180356"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 212840, 512, 2, 1, 0, 3, 64, 0, 3, true, false, true, true, false, "1ac09aa0a21ddd8f502ced214a56547cdbc9e89ec8de1afa06e799ccfb6380a2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 199976, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "93cee3bdd9dd5c4e2c0a50b9a724b16492ff34a0acf952a7c3aca9b609290010"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 212824, 512, 2, 1, 0, 3, 64, 0, 2, true, false, true, true, false, "442f88ea00600fd913533eb67220931db71e4087ee980bb0fb7061ccf6aad65f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "f7dcbdf1b8f796abdb04f7c3fe9a80b676fda16b3302b88ed039a490de1473c3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "73a97d14fab35a08c541dad2cb1e040591083ac74b99ec1177e6914af2fddb0f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 183552, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "bce8853ac660933a93dc10c46c264e1367d104756d61810ad9586c3c15157c65"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "a0c9628f718bebbac3011bb04421416edd1f31e77afad3b60007d24ecca04c8b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 212904, 512, 2, 1, 0, 3, 64, 1, 0, true, false, true, true, false, "65d081fa3e941f66e81c374f1df337b8c6ccd4366ca69f302f269d3dec013e6a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen", 212808, 512, 2, 1, 0, 3, 64, 0, 0, true, false, true, true, false, "8c2ad8cbf8059ee52ca63d3a921d20d1024c9f5a337a00d2ec226ea711f33a12"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 168832, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "2c3cce09b2cc4f2953d72331b9095979130d72f00153789bda2de962ea593d18"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "8d1ba9b1249c1858596bf266d2d17fcbdb0e6dbf1c2cae71a5eeb2c753f0e76e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 207288, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "cb2ff1058948fec703fd687ddb6ffd835df9e3fa2d82f1b7cbb9461075e7a8c0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 207432, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "cbb4dbae5df9981eea41f77006f1d1ef7621603c5ab5be6a7f4db9b4ff907433"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "b10cbef1915aa1d754f104b9b258f94d1c82badc270fc71b46cfd769bf56a1fc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 194488, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "b3a52d59fc8a88a4007dddc3e1c9be1ecf6d96131756071c4827e1868ccfd44b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 194632, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "8094fe91d49394b44ad0b107710d4b96c5db82fa4b0f67c772e9eda2adae2134"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "27b18fbc69bf3924389f60f8125b4cfc891e588bcc8c8efe2cb5ce9301f7124a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "2ab9bcc2a5ddc2845932078e5c2b610e1c0a39f66e64e6a0977b134caf2ab788"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "2ddeffa531921b8beb33d12b75e8a3525b14414436b3a89341764433561e1307"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "fbba15fc77ea767ec04eba2de2619e0ab247e805687c88a0801fc2b7ca69d3b3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "573516cd54a4b81bd7a66bae28414a9239b94c94cf6dcc3d6a05fa4833290249"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "2a489da8e7cbe5d11cec759625cac8754da79ff1264e9fd67ef5215bd1ccd725"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "1108d97f3b7bad558be845a810a17ff95796c72ffb7c0fd0e3df7767ecd0dd6a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "97f1f3e09c35faee8668a24f54890ebd5f78c2c2fa2e1667a9b311f48575bf74"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "2b2285283a17f344d63bf0c8e99ecc9ce94425ef813c1aac74d1304d5975e4b6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "853308d8416f85f7fac3d9f063f08aad18ae1a5223533f76820a52223a509412"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "77621345322246b824e256f7bf0d05b88fdcb39b5071ea1bd1205456ddb09312"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "a58fea5d4d4fcafad599376b65cb5c3ced71ce2aff2dd91594275128ef1d05c9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "a96441c6cea6097d9e4618614249bfe47bf62f1735f80115a5cc6d311efd68b5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "7ef66f32fcc1a6dcd9cab35b204214dddf92615651a280d4f38bd03d231c4fb8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "df03492838ae480c190bdcc63cb6930863b5b84ef2f1a38276542bcb66b90562"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 207288, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "bc0cce72b225cf82d2d7654b45e79a0de0ebd476b2e4d45c145ebefffe824a4c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 207432, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "bea07ca46f30bf4f245d3d7657ef85e9be4f47b44f3e0cdc0e4605f64b9ade52"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "e148f332017c9f5c9bb8dd153204a60807982c42e1064cb0e82cc2d05ed75e32"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 194488, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "ebc34fbe2682217cf2eba9f13224cb71577e03b0f41f7de733974747d787d70d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 194632, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "5d2e9d85ab65da5278c07fcb3b177313d73b9e51825d1d7668e83e68168dfe23"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "2ee78bfae07e3be4a3988ae679bc55a87086ef23516d9ef91ce8ed9d8f99194b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "435e900937999f0061af19fb9857dbc846c2af051fbef07e9cc58a0e0ee9328b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "12fe4e6327b2bc384e878d85b9c2cafaad1f023b90762c074e2b732f27942582"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "aa1803cf1aa8e186492d7b51fa0c789ead4bc493a0a642f751912d88e16aa9de"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "74b67bfe2610b591cf926b80334227f2c4d66d968a641c0fb9ccbb2ba7397094"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "f34ded43b4b76728ca7bc12aa22d99d7d1e05e2753603c6d68f5a74fd976c245"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "245273bb7b1ba4cfcf711b786ea296610b371cbcf3a61a3b208cbf3737aa7238"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "5d293381f24ed88c03c290db3b576521ad97c9ea1ce7a7499059985f594724e1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "c5c86e80dddda5b880244b4b77873b8bc4636e57760611857afa4a169b6e6dbb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "dc3233842c95b2eceb01b0ff471152b0dab450489c86b42601c1814d250ccd87"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "b606eff461191a0553797e61bf0ded472b8b512ce5c7b6ba5ea513de55c303f8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "8aeaaea6557a8b6e4ddd275f7a8f0c6fb2ac913421bad2ea3c0a85414826cdfc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "dd25567f44400ef82844e4e95c14d8a6cb236bc588ff808046d1a313a22405ab"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "b6e6f9f850b478087a357be49d04b7bc3b1e91a3ba42ddadec775bb8f92954ff"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "925b73011dbb8c1382bd01e4f90624bf9bec93ff355d3f6e6d251561875fff45"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 212392, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "4b3d77d8e6bbb3c14b9e0afe3a2e65082e2e32fc9757125c80c0d291efd9cecf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 199720, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "e868e0238deb95be3414d174c879ad54dfb3dec01f7850b3a260fab72a4615f9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "dec4d79d383b856e5cc25e91579bccfb8094cae5b6da68c4280fae6b0846a2d0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "4b44da226a01bc525b4a0f362e3081a76ed436d1aec61bd21f9e538841cfd6a0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 183552, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "0ffacdc885112ec1fbd996abe5e35f89233dcac2fcc2b0be1eee0afbbcea53f8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "79a897218ea88325fc0906fbcab34f20eb4ac18d791e139d27dcde8e5743dfdd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 168832, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "6d18b420fda7ba76bf349e4eb7eb440bb7e5d88fc6d0208279c402d3bf66f768"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "926f317f3b7b1d86fc3ebf97678367ecc657be1a15e636c709e02b37282e0465"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "264e4edd5da85a2d5adc4220fc561a355623c8914131aff132b639fcfdbf2ae5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "7f6063750d35a5649ce29383b9a8b2959be57053e5c054e71704316649897318"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "c5cfc5a637bd299ff25fc360cdd8620392922dbac96bd0459d33e0910461d140"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "dfa7b83e8838010d6b8846be66dec8e5881cae78aade04b85c32b1846c1e3a8e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "cbd10c16e9d17cf261fddcc34f7fbf757adbb7e83d12c8216b9f9a6450d3e240"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "b1dc57e0949c72cea1915458867042df734589b96298840bce15186aa234cfa6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "41b886e6a3b330d70ca6c51ea2e342ca3069733e962c68a2d1084f1dc0b9a290"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "748460e6e2d13e0eec902b73d4afaba7b50853905321f943affda8525fe5d66a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "63670f1710decf7dcb9935e9c4ddf9611aa3d6a6fda4afcfd2caf81f71b2ecd9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "04547cdfb7f5c4bc6a4ab9f68950944bd0b4481a57bdced24f48954c913dd862"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "7579bacc81949b3dbdf56cff58a1c11992dd0c8de4eb30b234ecf9fce4d3523b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "03b9f2982be5bc8b4d12939c2b4fd53046a99495aa18d21a0a709f616d03d2c9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "9ff1ddd998eb45b648dcad6a38aaa7a722cc5c7df987bce8d14337d26ea5520d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "455d081221e225abae0085201b51251180d767f1649604c773029d0fc48dc9c5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "0f9864905b2cbd7c8464c9df23b9c6c7c03e1ca76b813f6d1c482e6d60fa555a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "6d9a478fdbcb1dbde571c4d6334ce1ade9490587d4dbfa14f9f6842410ff55f4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "71603e97b0fa595103fa288b12d7e6e31ab4616a07081b9b07fc6aff8d713197"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "015b7ce92cabb048020474e37f4491276e99dc1f1b4917a9eb4a65da91dbf0ea"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "136e362041ef672ad9bc78caff935595ddd88073de6190948979faa961f39803"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "3ef69976bcd7193d41b10195ba604ec8ba9d1a8928dedee6b64f087fc1192928"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "d1fda2ce355900d7a6d3bbd7e8ff01473d45b77c5dfca1bf0d1cdf780088905e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "e726cc58311a67b54d18b94c327f3d2aa8768f2cd1f53f4b404e316bb81ab7bb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "302d3b8d219dae56b5869359ec9f68af4cf72b7a746895cfd72d8d3fbb42f4da"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "82f9189f9841a68f369b78107cd1291a8d1dafc73a5754347ba144ea26919668"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "d9e9f791dfcb0c6496c09e29424c6e6767f0661ecd517f8a49cf515d7f8dba3c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "a5326dd73491a7e81e691f4a1a24be80991f49e66ab6562911c8381cc3c105fd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "499cf28f2f46c2ab38c128cf94d68212f4b15a36cfc1a0b2a67686560c296dd9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "7378e016665b685255c88c5618dd1532a0804285281e981a795baad3f2966446"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "e5b703b82f78438dd4f94e3e3a41b0d476b7aaed0d31b46b027ce96d54e19f9e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "083505e66c6739575d272965a80c5a5ef86af2a212ab695ed0c643a14ff12f3e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "595c3f9acfca3af08cab7c3a00d6b7c0c1bb1728dbc0682706cebe1e5e95ce64"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "bdfab5f8ef257fb87f227a6e9434b563e6181d151f7562f367639f248de648ca"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "dae6f49ac70fe40e94ef7c60a794130f3dace1d232cb39689c42766e8c80f851"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "70ba27313a6fdd25cb03cddf659564bc2b57b814d402f7b4d16fc42214f115b9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "39449b20f9d89c955c7a5e8ecc68e07f732f947c645be9798d63f3836bf31028"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "d8950e7461f1bbffd638170bf33264191a31cbf3cb3b05eea2b7b55bb05a7f9c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "f628a32fba619706b2d4754e4cbf0ba801093697836a556e82a79e0151c5b31e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "17c4765146946e5fde3ef552602d45c70968f525c4e3e14a4313cfc765fd2ff1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "850ecfe632e2b0cccf4e350d4351d64fb082ca45555d7202bafafe2dd4f83d2e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "48c045e66283554b3c2bc641e5890f89a3b2127efed94aef6c9027bb4a98d1aa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "7504155b80b1536582f0d6b058d10bd4c2b2dce2fb3f57756dde1d3912e6a771"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "532f9dac66151131899b09f48546f35fcf578f5c160536f74d09ee70dbef0b23"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "080e026152801c6fe4c9091a3d6bc66edcb49f089bfffcac8d1f9c2944a5ecb8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "60bdc36b0b2b451816ae7ed5e41bb96e595a1df5929556c5cd1eae3a2419ed9c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "99146cdb7468701f41b13941a3b36b811cdb089d056641b0df984bb9a3b0c25f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "db6cef6c470d2b41adf58043bf3874a749c376b90377fdc468597d846b024058"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "6894ce245e78bd22288fe6e084a7264b4bc441ec5b219f1bad83152a64564f24"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "34af0db9f0b83bb2ac2ec589b1cd4c4be8e35b6bfecb025b16f1fc832ab42204"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "415b447263fc6fc1f69b56afe2d34a55c6b86a3562e05a9a8672988d1182d6d0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "2bfa51bf35f058890d3639993d266aa3d98c7599b94038f96acd315bf04f21c4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "3fa79d3a035b9005b69f4b9cea9dbae5410c9989a91a4ad68b4435054c7827c7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "d273e41116842141207f17bbdc4f8cfbfeedad7113fb3963bd97d4f0bddda79a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "cfb0081cddc38e016754fc7b7402528da4ee0cd98a224995b8d2f520fe576d1b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "f631878f1ad9bd3d650b8665cf2acb31eca75b8536f8639e4235a38f541eb220"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "f4f851ece42ab157522fb1da9189778b4a0dbd93e0356384248910f406a79bd2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "d5ca1fe24f9de4c69deb2eca4de999f3881a84adda47be0251b52ac78377005b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "d094707531e7706c1f798252449b0d48f336cb56e02683758d886b20a004000f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "1a9214656f5324e39cbc33703f6317f0e43131a066cec38d6dc2feda1877cad2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "2d7d1fc344ad639252fe2f1f002bc65f40ea02ecd85204693e1221c5524e15ef"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "0dd4047e75c3ce331e68370241c6f9af9f188ff6d240412be68ddb4a4c5dd77c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "842369177cb1e106b7f77a9c70fc43c1a9c3169c64c7fb0d1a806f35d217405a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "722576ad37ddd395770a4af6bb97355d98025bbbfbd0e3aa08cf0000c397e3a5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "782a85f81de35cd97f15aa10eee0deeca5242f57254258c34ab97f3c054ef45e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "6f13e09cf23c536c047345eee7de1e67c530614007663bfd2f2b17eddac4a488"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "754c23b88b0d4c530eda1dff96db1efeba6d4904941a8088464fbffa20babf92"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "916fd06f9b0fecdc49f2cac80ed050c4d03d4cefbf55cfa55016cb41edac1b02"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "39d61d5b95c1cf54217cba925f66bf7a8f29520d52a179d756957f4681c71b0a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "16319b494cc15a292e05513f9a7f644742f44237c4a9f7202eff11059839cffd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "2968b7b626889e4118784eaea191a11b0deb494ded78335e43fc595c57cf4b2d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "f38f7a6d0445b148ea6a3e968da3d1065a21348203e051502d462989c69054b0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "d8f2b7323a4a0834832c5d1c8395863075724cd8961158ac08277c1565ba6265"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "c620e8bd7c10501634349861aa923f5e45c75108058457720a24b2a76f576409"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "051ceaae65c9c737814bbd4133c2892eecc05a15654a8f4379b330a35b4b4f18"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "67c0ff619ffaea5d4831b2b2e3bd08c1458d40b720679bf6576fad15233c91cc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "c6e63e1986f47c2d96a17b8b2c7a6eb08c478108db94e0b4af3831bca11e94e8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "7060e41ce3685feb80d11ec18a7b3bd128ca55aa1f86b1e38961b6e6dfc29e43"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "833253dbf9629ac0777ccf76027931ecda9688efd197a739b8e45707241ba913"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "29e8ea4ba8e95b7eda015c8322b79450d988ff07991282dd619b50310a79f1f8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "cbd1aaaa4dd3ff6c0ef13bc789ba8ab25735bf2311dae70202a792a910bec88f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "3c797a3c1f361523c9653f6a8714d595e668a0ff326480a35645d73365f1db32"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "fa399c4ea4371bf0bed152bbaa748c73f56e941327001df9725f553c86d053c6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "322de11845a3ebbadcc984e84f75586b0970ad0ea17ff35f4b45da89bc98a654"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "86ca37b33921c457875fe18618a79cc0fd48fff674212b55db17123ab1a6e166"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "ccaaf4be49d4d3a8800a1baa3af1296dd96f246bde8bdeb88f2bdda1622fa6af"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "b4ef10e8bc96df3c31498a30f8a18bfc7ed5397ec8568b69028528e8f2aba30c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "35cb97a893ffbc42113d242926ececf4cb748725078d55be491d3c6ac1152841"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "8157d7925b396e2b072ec5ad25ba3e01e83c37b5adb6cd66899253d9f4f44286"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "40c81e04b8cb390dc367415c4f99332b529ea60b529e78b0b4b4b9626a398581"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "6a7b80b7c8dc48e898b930691f40b4f63ebbfe04f19c4a7135f167bf6b0cf23d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "b8f233a62371b1c072b09adee06e0230fe7adc0d4785898482ecb066e413d65c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "dded397a9d98b2a98c0865c237c6f20fba6daf55edc791ab040ef59537a1f4ca"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "7c179a6c4ab94944033c6a79ce6883ab0c9d76cf2f5a0b8646479ae1a997868d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "63d71344c45302c5fa175aee52f4faf7c87d6294d27429a4fffe6c274e4a4927"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "67877d2085bcbdfafeb73af75d13ae5a58bcc9d23c48eac9a9b80774428251f3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "641c8443b17c38bc60e975a4385355e0bd9cbdaa0c59866984bc7ea81aa8986b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "6bf837319be2f423e5c57bbb6ee90bbd2bfa3aeb557a4fc0c862335083f9b0fb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "9af971386efb065d9af42fa5ce1ff426830a7ad2948c40599e73e1c7230fb88d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "729a8043c536e7e1f8e43767574b4d022216c0874f23b34a2aa7d0b480b37f80"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "58d58b50c3b04eafb351d4a7ecba616b4da5acd6f07da0bde8b854fbd78f2850"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "dd1f6567c5e63fde67646aeefaff846f066f512bc2390fb14c4469cd7f74a919"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "f57400c5f6636abd742f02593698cda42d9ddbce26da0f6c2786379d48fe4a0f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "588dd2d555ccdf93c8c157e3974f44afb5ed418b261c111191d8fac01a0ad3c3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "26db838aebafdbfef502f74f390798aaf953098d7afc320b76cdfd30ae9045d1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "af6d6fcf62927fcc121071d469896be889b1fa1ac2620654d13983dcc44c0798"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "b222edfba3c6fe6ea598c577c5cf7a8c3e4e8f5806e186f9b78c3d2b12d008a7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "2c52bbb076c217278827f137b9d376ee8c0f818d9a22cdd59ed1004449571e23"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "c995df1ea3508e64fb8cb8b4258792531c44f333f4baf50c7c4066a8873c545c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "eabe1490f4a452da91141d2278257136655fe30613375dbebdb0db63c7be1a3b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "0ef862863085cf63c8c58b18dc9f979318bc07b264e479ce2291bb083da2e892"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "c275fe4b59c2605ecd57b55dbddf2b95cb84bffcfd04aa64604d3c1ffed54b78"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "34ce5e8a065eadcb89fe404619569531c2c45774fb3252dbedb6ed961b74b661"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "8c95ed37c15968b94ce5431f5f39cf30f9863636b73f9db5588534a53419e715"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "48f910453f64ee382f4b0c1aa25dd3526b98d71c11755a0ead84110c09062781"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "0ab7eb918801af66f5bfe70047529df6aab1eecc950636881330baafa571172a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "7476fd7061180decf86b378e1fb19b9b13f2081744596538aee3dc0cc9f7fcf1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "9295fdfada1519de309ac6a47238e792362af48f52c2086f3775bf38f8b837ee"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "a0eae62dc5846902c1d3de9e1ab227a99855942b9cdad7552d2122539d70135c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "d563e47cba439e69cc2012d80ff2c8b09061c18260eaeef04007c359dabb58c5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "3a691cbb93646433dc25b07052ad0b04e10825d93ea928d35bbbcd81aade438c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "fd510089e930f228f16dd8daacfea3a2b392c37d875070514b888ef317555112"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "a2f238b350b352671a9e46d19f880e8f9fcd0e8761320ae4b912f82bf6f6599f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "bd466e81a5ca2fb2b712e75d44ebcce71bdef386850f6120e591a9723d028890"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "9af2b5e981e3373ddf40347dbddf518209de60bd9213a5e1a58c8882e3ecf499"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "61850d30d21cf499f9017202ed2bf896ea007be53c4d5088a8ed146d21e75b52"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "f3cfc58526bb0073c5f7f6c77371c5ef7c988f38edeb1554a7898b19b7154cab"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "0ce8653cb4e704708381d2f8bcc5dff991db9827616063099d915d4d5f3c492e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "85d73bc59098634a0acc5ec5f2d7113a15b47c5e1a0b20f075d7f6253a72529f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "f20345498ab4bbef7be2e2315c99898608f0879fd6038a352db7cfa21f8e460a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "4e2c649eeff94524e166f4708bb7121a3fdf4768de28e269221a9b59032afa7b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "0b5e3e7adb0c20e2481b8f0ff63019ad3f7bfb2179981987c52c904ff6ae31b0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "d0ed03f25e5b84f76881aef5d9c99cefd2af106553eaad9a9fbe58877189b47d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "b48afe159d4183f49cdf30960f195f6e87a6c7f8c5ee1aa2ce037a1909dd691e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "4a6dd440628763e8bf226c730e5b0ba9495ae496c01ee34dd00a14704bc72148"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "017dfc8afe7d404d15f4edf53f93754fce5078a78f9948840c9ee551407b2e06"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "88bbf4466c5abaa066f0eea704137184a32cb82d982e4d8630f0e7f69fdff90b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "2a7e4787541a86d9f7e1219fadae16c9c3c9e2fae8f5cfa06e93ab9bd590044e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "f6eb62efcda14b65f3c2a212b0124f6bca482f787377a3a6c26be2a96e07b6fc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "35ba09c9bf05ae509a0527b29ec65ae5c9011eca3829e4de21ca71c9be1ea119"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "b55853b5858717b306df3cd15537a587cbb62b8f5611577a0d453a967b55eec1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "45a98f4d34207f8baca6bdd0c023dab502910690ee7678d47ddf90a6d1f9cb59"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "bbdd03bc4b74f13f4bd9382e930b90b8ba21485bf562c846d984200dbc47f3e7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "879bbb309596daf78d951c401b667803746b0d05bab15856a74b0a80fa251689"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "63c679de3c8056c50ee24491f9ea9a1b248c38f2a21c2e64d49e496664e87a40"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "530aa1e11e95bea6dc027987ca8e229463ad8a7e10fc4327ed0f8c0257d7396d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "abaf03b49f34a4922a4c6f8e09979bcbdf090c4cba0ff3ca599538b6d2d14458"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "b0ffd46ebc60decd7b1f16691062833202479e93d50a0aeb9d3b6332930320a0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "8a227523018cf0b1e9854b68ccf724be35b95227f339f904791f400466cc604a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "30018ab00519382e4377a4b819c30cd14e6acf7060386b211511ea133a722264"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "d70e607f9bdc6fced7599a047ff3b5879b081ca6662f146613c99f7d85061e0a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "133373d3eaccdf2a6b709a2496726fb3e529d0fb74147890ce6dc115787be0c6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "6f9c780a067a657769a73232f7a404005ee000d502536825bdcec573782506de"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "55542969a1ad53384e765201e29be81ab8c7edd65b899f2cf2c3472c0a570c4b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "b4fb967e70c96056aee8bad66de0bd4604d048d380fe3632058e531980c259ab"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "ce4766378d2f799d20b13a32f4409ffcadde2ef77f916b77e51865ab299d06ea"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "464086d9ddf6b46daecb7335263144198b55c62e69190b763f5acc018a5a3d61"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "a0dd2b803b593f1cada6c47725631af4434663f59e0cd6229c93086a6373a6bf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "a0c94dfed6112bbb4b318edddf60cd25bafb0341b9a10532e0ff351e6681605c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "8c16791c8e79275108e5f5bbba20709cd21a878dd8dcf1b33401864d909bcf1e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "1d3a3ffe5351037a85bd0ccd9a23e1f04ccc88bcf0a7666c2905c3325e45f4e2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "07786bd725cf8ffa984e43441fcfe6cdc0b993eba65ff47840243c1731dce953"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "e17dc49d337cc8051300c4323f6924811d1072205993b146c9007e8140b0aa7c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 228424, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, true, "e4b10d6c3347f1d3d45b90b541bd9e5f16c646d60fc3abcaa96c14eb5895f8cb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "8a94213bb716839266e59ab77f241956b8e7b4bfd9430d786630b0b9db9f68d0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165056, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, true, "568909bac130e07b8a2e3ef2a1ff4e7830a203ae08ba8adf7d392e0279e9c5ae"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "ddb55b921ab4a495a10303ba652dbe5a50c74a8f913a1389b57129da3cdc8d72"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "bf934050e04a19c9eada96927fb3c0a50e6e33326f0564cc3061eecef7af4225"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 181536, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, true, "146235b1c690f80ef424a612cbd686b61fadfbb6d53ac9bb50dbb0f17cf83c4b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, true, "7cdb7d4b92482db228cf6fb16b72478958e617b3b99e10335455d0bac4e1c598"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "62e9825bd87391d72ec5369dac0bc9a5103ca8e6c5141ca9a86fdc4e09258d05"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 228424, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, true, "bcc339a3224740a40fe6e4ed2b5d372988f830cb8adcf624b004deff85f42121"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "339d9c5e68dd3fcdb9974178151dae9104dfa2933e27f2ba08f2344df9f72d0c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165056, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, true, "a1a7f3f83abdaba4e70e9ffc751401166073f3029c63cbb85faa9ee51804e55a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "99eec5841b5314476333eb3b02db19a66c59daa036661614936e605aa992b6a6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "e9451bf83877cb290c6d73f147cb3e58a8be5b81062d1e8378ba0d0e460d9474"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 181536, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, true, "848501c769982f5cd60f4e01715a9818d41cba09edb8f18b29ab9852b9e13085"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, true, "d800374923434b191eade1515f16af9973fc8f4244f26dc3bd398372487825f0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "300bde9929ae69c80bf9832cacb687875a8764086b593a3d39ef83155d9ec7bd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "227e0b7dac4390c8120a8cdda3f5000f6f7d7b24ff3e0b80d031d763469574d0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "a1e7629e8d5571919cc90d03a03616e6706536ac81f0851cdec9ae16db64e093"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "6acb73afe92b6c080ea06c85b9c052ce98f6bc8ca06e8668e0e9885a901b9ebd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "c66a9d620048b6daebc4030bc291e7141f49c9fb7616a5295ad3f33e2a0e1818"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "a9dcd3035c6009adfa85a833d5dbb741c09da39fc832f50c5b706067b07bf5f2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "5fb6c058f28a496f6268cc19113b0c6898e971bd645f39a7443000f2578bd3a5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "f8bacfcd9ed278ce6c0eae41d7c0c74158c2a085fb13b9752c414b5d5f9eb635"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 160016, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "9d4cf563963312bef214a64ba0a57d87c3011c2c7326492335232516c1661aa7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "b59ff312df03e73ca7763b83017cf828cb3757d965c8b935b17f325e86e6d308"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 154384, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "e43c4a1fb9349b34f3751247d3ea39bbf0a4a76a6114c566f99645bb68a4ae9a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "3d03293d12b7fbbbc73fda3cc8fa73eae90f7877bbf0cfdd88065edc8b366e32"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 161136, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "71688a00d9b3f02988d036d0287abc7b50e8f0dfea5d06ce8b8364f6d5529789"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "f53596b9e6f7bcf049cf017f10f376514cbee7fd341ac1ab2e3f46041828b31a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 155376, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "2aa95e733aeb10f0af2bd0f1f49a6450e5e9bd25c0400cd6c875ee29c54ebbe9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "f4a8d45a6fa37cfb2f9025708598bce8a54a487ad233cbccad837de120580c6a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "52c48614404c5745eb29296dab96465f3df1def2a506e28c80bfd546b7dbb765"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "cf560467354e34663b21ff92070852fc3ca9c9271ccf30fc3380a3fa2e73ad43"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "cd34a89f81547de95a95ec07c1fc204c5405c1cfcf021074dc64fcf2238fd532"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "fc2ef26e268036305e83eb5442f697539230065eea1365e9594041251a525770"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "860e7ddf3fb7061dc0ffa52408d62d94eb1a2cc7fdbf90cd63f43fb5222b7d38"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "b030ecb879b5f38641757a0d04f372d2f941e7a42c91cfd28262d026c3ba396e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "42aa749d851c140f2da84a807456eb8b313e1a7b3a1f49c2fd48c823d787b04e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "7578e091bbecb1e9e679775edfaef3ecb52fbdf8230f55ea5807e5cf8053f318"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 160016, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "824db1ecaa35dc4773c5ff4ca9f13711440dd132033ad79d77a134b81be7606d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "fe955a81bbd52148d13e400fd4276feff24de78cb4d0ddbd5fd65efb1b77663d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 154384, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "6a5959dc9fbdb5c86b9857393b1396a8d42e10887abc7b96d68e02336b664d4a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "38e46b33c6518d0d39371c9aeb84bcf5fcab0e916810570c3e83b6df3718a34a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 161136, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "6db9e35323c185b11106da648039be2be00e4bd6062038691dbca300512e3b62"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "99cc55984f558bb612fc985d967cb83c8a64b8d51ea483a2d006790919257124"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 155376, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "541b6417bdd4536bd9fad128ab403885a0afeda7bb7e5d973ed541ae072c84a9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "84aab001f7ffa17558d4f9a88540812010113931f226a626cbabd43118c26c23"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "14f16d7b9f1efddfd70ec163703bb45625526b8ff61ca47495e7f2f3c77ee106"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "cbc7a4370ffe040c8fd1bc8403876e228b5cf53b4f030b7044dae12f87ae88ca"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "fa316a5260d2bcc238e3c4b8faefaba00aebf322151801fdac13b175e524afc2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "d16fc5173de9342901f9879938d48c16d4729c3c20e43cf76618ab9c4f6a39e1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "df9c2a3e049947e9810b2f1ab1fafe19b42c99f6585836a91e7b6663f87f56cc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "bc6cf08795ea562d48a8c9b8c60eeb43b55c65efb8c6c36debe6691098e7b66f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "9ee9f5d2f6baedc7654ab78766a83b9b09775c8ea07260886245ab375a23841c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "49ff06c9520a6fb3e6ce07a232f0ccc8d5aa72c991ee96076205e84e8c4d72c3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 160016, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "8a0cbc7d632d231f732595fced85c66675c81fd5a5e890eeb44793d983860f1d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "9fce9259ce0d062d8d7ccd56243efdec2eca08185024ee283b6b99ed17a7a5c7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 154384, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "a0199aa11009af78e9f6309c5bb58aafe42539620ce22dd252fef3bf38c056e3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "c53ce582f6d4a13900686add9dddc7bfee0184386625b8b2c417cb78c38b881c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 161136, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "819bc05168c13e5eb23e93c124b7959c53c580b8a15aa4a56399ccc445781f48"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "0c86053a0a36bb105638aeaf37fb7dfa5af20e462846503fe0bc2fb53e3990da"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 155376, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "9d528d42784b011d4ba3122bac29af56fa8a05eaff68b6b3892805b7355b0ef7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "05c3771136db8a9ba9c6bc11a61616daeb521c85a7e3bbaf32d0a74c25fa0168"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "8beaf63c3f6324658c8e3673a06a648429f754501fc588a86393dfabc57cdca3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "09d1b8b359a9cb5120dcd73ca0559fcfc8ac0fa236bbcc416531f0a699ad7e4e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "6595e414f7fb37ca385c3c66e1033234431542be1a2179f119a4505475823320"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "8ec39d2d90330bffc69e17ecc0e5a7a670dc3e2abd6ef14ac33ab25ba6fe342d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "5af773853e60b1158fb240f77b95a9c66ba403c70f4111f86cca4d4ad53efbea"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "aa6a7e2bd9d10e45631eb28cacd0922bc8a4f4d304eaef7c494e86564772ff1f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "336c4d8ad6760c98612906efe073544b5e49a4aeeeae05a02de3d88a5a3d09ab"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "456ec40aff6f8557f612f62d6ea43f7faa0ea47fa6848b5b32ded3ae4ce9f5de"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 160016, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "a6deac82b3e78681bd4416234f4b0231b17d139c51fc574492d101d83df62272"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "8d4c9a2ca27746256180f6056ec5e94d6087f1c6d2bc556524cb3a42fc0d1234"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 154384, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "1976c833dbc0200b75cc2a63f67813487a046f53e31727cc7d8c53520be96d63"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "0b3740c43f3ee26ffb8012a2f33b1a7c3e028e0262d29111e355193fc81b845b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 161136, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "f126474fb648f002b9dd8c1c1eb98cd47467c895efba354f1c318ef5e2dcde75"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "c0ac0e5b02aa59ccd194bdd8b4a95960610039c2c3e0268064af4ef82c39e614"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 155376, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "b20af2cfacc1d386e99c0f8b648f867daca5fd2852ec83183c2954bd48d46a71"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "2512fe69a5514db440025972b6f660f75d7efcb188a567951a8206303909bdc6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "d2e1661965ac8bdedb3c126733478982ad5166ce31036b763c5548aedf662179"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "cac118f767e8596fb178b0a3237fc30847a41befd6bfac5328b6859bc07806af"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "0c65d01531043a637bfeca6199f8ef03c3e350108a5ecca6d99fd42a38ce2983"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "9f8bc430fad042c1d685fe3279a5fb8d436aa808317b85b80f03639916a2eeb5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "0fcead2f4d22111a85fc2a3cf48187b6aa600b93ac94f20f6b5f881514ef6af2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "5c6b2c32920dcc899d2fff67c69bad4580dd4e2a6c4ee78ebbf6f8a098c87332"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "dfc95d8b5c7c0b5e7263c42c5c89a91f58a608a30faf1785e431f798afaf2a73"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "963a94a79474d3ad317ab5ba2f8e27dc1e6e593d400d43b0e779b308b80c39d8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "919321638f8049490c2388f092df3452dcaddb25819a1cbd8c451d9a3167bb52"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "d3759ffa5f56a79a9b44f594525a6ffab67667f37866d0fb152d25bd3fdbc108"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "46785c1d09b1b291c07435494525dfb82a8cb7a75dcb44f26440638a493a250e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "a7fc9458105332bf1fc2b2cdca190aa276e16a0a39d80ac72f9ac1c88dd7eb66"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "2923879c916af27350ff40b207d59e4246d59ab55f7d830e029c0ef697354ec2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "0e32b5411c33ded3f138e67135db309c5a14bd034fe957f3be0a9703983bfcb2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "04fb6b014f0f21c7b99cbff5cf37369a4d927015d78584bc7249623c559c2b16"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "be5ecd8e4c39cac02f0cb0bfcf662a0d221edef7dd4361b9abd308422c76e828"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "bf9236b7c186f8fff1c1a58964f01f2001f5e14f1959db1f4521f583ef557d6b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "d33ecd47560548ea9d1f00d0d985e0c38dcefc34391d9fdb9c36b4e6241d55b6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "9628522a94fdeb2ea8492ecf2491903b39df13252c67c94c8d0644f3e4f82f66"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "bcb678011aa8fe7250aa53f7948cdc6ba2e8fb34489ad1a2ca10c1b5789286d7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "cb4790a8d297f217237483c70ebb6ae56d96df221ac71868efd053a1c8287087"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "0a1cc339b4dec459fe9df6da7aed43aabcdfdb42143ba25f7ae19450d1d7db7f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "3f2161d5628463bc2ada82137eae06f8335b24f4220fae2436da44b8176528e3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "a2d7ff3ea979977c31ef4fbea6f85ae911edcfcd33a1b5bfca4308fb71374471"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "c586badaf6eaa6874e52887b71d5a2e346b8946732c4f3ab7c152052b466f0af"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "90e077d3bb07e79673ed19ef1d80a0f5b759337e3b6f3e8796137100081cbd48"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "83cbbfe3c3e04139ffff1ec255d8e6236cdea2e8c852eeb88625bbbd8838c6d2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "9b8030bbbbf6bdbc279a83592ab8360bab3c597b3da57e4e89d0ed40b2363488"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "c78014ed3e80ccf236cf976107e668b3c05b8d9930240aa31f1f37db636f6a78"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "09ca20f03d953b50b4f86f5732a77dcc0ea2e82593d48e6f0e5ce608dc7cfd27"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "19491525842b40405d01a3e646071405a10abfa0d4c05356d0fbb173b8ef8c15"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "a35c6d053ac8e682629ee451d04b8b6dce2bc3306b3637ac51af4254b4843bb3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "aa9e5f5fbd7163003884be2119ecd46630ab59f0886569b2cc2614ec8658e731"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 191832, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, true, "a05d2e215213bc571a57d7de8507efa6bcc8bc6d55d3e26afcc9bc57417af480"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "5e4fa2ba0c0b82397394ad7c3ef9bbcbb56ef9aadf241092c6fac32e59eae923"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 157008, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, true, "4d88bd2bbd3e1e879273cea917d985fcb3fc60ea9eecb7342e49583453299527"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "2c7f33d24252981b1c222dc65c5efe347d3a1965de146f99211e05073f977ae0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "5ee61950dd69caaee11cfdbb7ad5ba33faa892e5b6ad8e84f1a85dd3a07231a7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 165296, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, true, "aa44720bfdd404117b780d44e0d6df8967ae2d8fe5fea05fdb6363bc17d4d7af"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, true, "f9024073341f8af168e7dd288c02b919f4bc7379955d004d3dd1b7934769756d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "e3e909110c6cb6f5c682804c263e2bface0b81f546ea231045485ac1dabb739f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 191832, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, true, "b78f20443cba164d7b1f47b8ee6fd799232d5a798655f6d90d2736a6e5bb78d9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "35c6ce4e28fd707b223c886ac27bc07e51618242cc466b0478b7980c9c601f4c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 157008, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, true, "d5e0e162d4ab010ba97e7af8f5e794e8e2c0f92322ea35a0bac738dba1877a78"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "259a9c633b26dd7facdff2962a2e1a2c6d254fad0f30e85b5bb8955b613156bf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "421e4e65d9a4b30f3c69b946a4e0a0aad7817d93c3520db08e4e8241e6ffc767"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 165296, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, true, "c19fce5c3d84833c4737bc217c70a275aa17a919377b672f88c1792c2e0c31d7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, true, "641f3acccde2df7cd20e31996a7c173185445cec8ffe2ff66c250738a6ab1aee"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "3c5ba679181003621aa2ef41e21f6dccd81feca66d8aba7c303d1c140df84a98"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "364c4aa6959c4e547f2b0e8187aee92a9127013ef58b9e052ec85420aed4c33b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "27ecd0ade10ad5cebb4019193ade5c9bca0c50377530be0032db6ba566b497d2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "32e191a02964b1815a87eb3dec5e75871773f510156f4af5c62f15d1e0a9b70a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "52fb7bad25c9f72033c3b2727eb9a80b241e23a21a1752dea1dd65d6428ac144"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "04b88bb3699a8ad210fa5651d55b8d7c5490a976574e21937d5d3cb83d86c87e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "806496a666b6d15fcfffa1809f4f58e133452ddbc1d93a162398041dc86ae6fa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "7f945df87a3b8b3de414cd15a40e6ab209299afd2e1109f37e088603264416cd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 157088, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "83af1143621832b0d32a83a2398e4776e18b8255d9ac02957610459e0d0fbe19"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "e649dacd019a7c32d916e1e5da6190259c82f70907134c9716a7c95972fa7513"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 152992, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "11b3302d3dd336ff1bc24ed583bf665e48dc8bea12bffde20cb93a56fcc0122f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "85ffc88539007d12ddf305a0735f22da35965cf66d6ad89c04457dc9f2310955"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 158208, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "d683ddf22fde827332f823d05d1f02ce77110aa203bbdb5bef3dd2a05d716ad1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "7e7481962c5eb5438f256297484e901ce22f670b5d837cdbff2160d63f67febc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 153984, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "6557d306a4a238f6f4c0cec4159003f4440415eb2c485c65212d4f07a2632250"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "443cc7b4377dbf4061c31f35e916f13e20410494fccc7cd6115ea53f2e15012c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "06a384a32870e224aea27e98a5f3cb7adbef2ffaaf8b25dc05af5c06c5a941d2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "be37951277e71094f05c7da4371882fb5e1e2ecad384fa49557d475092b1ca66"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "dbd6cc2d390ef012916eefd622e026ea26de7c8247e2157bb4ff9b4bba42eb96"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "b315cea045efd2d9b0fa51b09c206aa621afa0de7cfa898b1e151795de27ae26"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "3a060c3393229f3da446b32812fd17f89b95feb27f2ec4b5af7ffc71520104d9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "def2f9071c248c7241ef1298aa497210e6c08c8c6bfba5ab6daaa2bbaf20bdaf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "1ff77f28617d7a1e52ba2d8d89fe404c375775c90783bf905081573b81d68836"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "cfe1a08fe45ce784c08a218811c0642d0e7e2d2ecd20c0ece02abf9e884d0a53"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 157088, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "999ea41ba80768dc9591460dc24d8482679e7d83bd46940fcf693d6017b152dd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "0aa2912ec435b17a21726a9d388f565f89a696b040c67b66534b7f1023bff76a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 152992, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "689d5391a4dafeb50cacd92f54f45afa9ca28f70bcb24eb3e585275c30da31d3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "e46f732c25c5a43a6c7f87abfdf8fcc5b8f3bfd58b08a65b87e44c27ed4a61c6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 158208, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "0cdb1208cc154a677231a2bf793dc3cc07baf57a651d20e8891422e28ea69445"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "a15a0a5975bde0252b1aebe2274844095ccf71cab25baa754681f75038891d5d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 153984, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "16bc46a48a82053393871e84f0998e7e166637ac428b944fe8bd86d1f82c7043"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "65623636bbd7e2aa3e9f749a28e42b17f8f6a943cac4e3c9d30a3de369906510"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "371e18c8aca6021819ed94dcff5a6dc9dc6d33961e21b6d833b0b42ed7009354"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "bf8bd8228b25bcf2d922cd992be22de71aaf50165c2e0af557c15ed02d538d10"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "cc16db0753b4571a474691b3d4b1800068e5a7145c0f6549570a242c53780932"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "71743778811a905995d6486059421f088374a338f13815aed30948e44fdd5594"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "837b60fbc3e68782bea796cd3fdeb5385218a3f13bf9495c92abbbc0505933db"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "ae862ac45b11eec27d0af16d088c350d3ad50f8e822b69fd792d3af466e51fbf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "eb41d5f8eb9c3e2f82c91f83ddd12a4cd490e21487e40d17c43addc8f276e9a3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "4b1e761ffbc32dade02f8180de82cc04a2cd76ecc81992a6a64df29a8799e3e0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 157088, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "5020564ea5b9b041cf32fae755fb7113112ba4774df1e1cfe29e5be453e3cb75"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "588a37c73d04e70ee8019a8331db695bdde55ad8b0a02f6c6604842754f30bc6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 152992, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "2d081e189aa50932afd80f60acf960195026c41443a271923f9ec7b1de400286"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "fa6758557ed7184af3a88cca521300f9fb3520b4c3803b8360bbe4c2c27fa5ac"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 158208, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "48d4083ce94552d2473146870a257995da1256fa5d8e0d92b8a34844fcef7e84"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "d9517b4db1725e9f30c3c144b36ae071847a301423d10aac8b347b9eaca2e6fa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 153984, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "2133550ee01623887526ca06020a167343fc64aef7461afb4a8be4df48a06e44"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "931ffeff4408dbe696080027d05150ea0ed713e71902975ede304af621629122"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "3f926164bef219adcdbfa6d4070d437798104edc13fa59588d70672c4728116e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "74b3247026006edc2fdceb986ba4a7818f218d0e13af1a48e2d9f3677bfcf584"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "48cd94ffdd32a8a0726702dfc3425ee3ff291db55923d24ecf12bccf3549738c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "ec4373d7ac894aba321d70a489c5b6df7c143c03efcc76ff91c0e5562b50fb24"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "91c842bcb277b4a2bd2df6ef51be5522f70bbb5b0e82cd2f9844033768cb7911"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "9fe1e58c6fc2aa017a4acc6a1415d586e7d4a221df637df664d964fdd326022c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "8bf7ba691a9da5cff42a702b5c9f5320d8fafca9c9d145a6b8aedd4a10360417"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "f0d21602ba64de249c6b7ef6df1df9ce0e06f08469e001696769fd8d8dc5bb48"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 157088, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "71c853d4d4f2710aabab9a4be873fd9e403777997f834e447afbc276d3f10a5c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "28ca03a178940af3641e2c465f89f7ab164f8747700572d5bd1dc726599c8a03"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 152992, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "4f2a5bc5f372e0718d9abd87364a06938677d15d53db875876095ccd0816c987"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "e7d327d050d21d9c923e20034a4122161bc687327a025b58620d09f489215cdb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 158208, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "4b7fe59fe68f56b62417c5f97daee0167fa41325c1d5bd541f3baf5b1244d8ba"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "9cdba1d9187ff2ecf5eaa2a97588fb6c65e9de3cccd7eec669677dfebac28afe"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 153984, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "e0fa9b5df103b816bbb409739e67daf91032dc92d3db5601b335b254f8d7b613"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "ecdb85ae43bfc29d151c3314ea0c38b1cee3f983042e9d157097a722c33e2920"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "2ae2d0ddd3857d3d74288be0504000bcfc1bf9361ea85e022c1448f9963b67ce"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 228424, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, true, "ca2e696b7eba254c77d2aeefe088a7d745743283aeefffe3d27c3503f5ef26b9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "5370b562bbaf3c99ea2c5a60cfda7c8e909d3c470a7ecec335e173056f42e5d8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165056, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, true, "fbeb7cebf1bf8655f13690be3943bffe0d04c765673715b66f790588aee48416"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "7f27e34a6a8fabd97645b809aceb7259ba01d3026e85d964f36fba54ae76f2e7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "d0346ae9fd34e34e3d253dd65b8d48a45db7f6854bfbea6dd6a204ddb5d9ed6e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 181536, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, true, "fa25427f20446820abdd39f05f2980cc381a73eca7a03657f3fc898b9e1c6ed5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, true, "bdf12f7ddcb9034ea0deb062e80b82bf0590b55c439fbd77271429c8ed9e0990"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "2b188cf6a69661a603fe54900424f8d719a122e058d6abe83ef885eb9e666fb5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 228424, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, true, "219732527a195311b48f5797b91683a55a23b19b44563453c326471f01d6c325"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "da2909cdf709a676327c196673da1d3b620dfac52aa8bdb4244f28cbd02b27f7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165056, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, true, "c6c3e81edc6116793d77da7df75c2756bdf4f5d296181aa95100a8f5af110ee3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "bd76ea391b4ce6aa748d440f4d294caee75a9d562aa104fcc38fa2e24186e7db"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "d1284ec541f8465d966e98a44abe4522af328d2c4a772319176726157d4162cd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 181536, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, true, "47e7cac81ac0f175f1f5af7984b4f10ee272bc779ea9bbb9c6a2d750a5390f33"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, true, "0ec72000ca53a3504334b490a28cc979003e5df4128679ec6891a578cdb65b2e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "3c8a6098b1f6e7ebecfe563b44364783ee03a950a688768526d76cdb70e6eba9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "2a8042b225c2e90c98133de26ee635268782a7bbf7997ae1f6a41fb069bec053"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "5abe3647b36e7277eda6437dd69415c41373e87a9170ace68eaacf69a7f2c922"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "51e97a3269edc51ff76aca8b59fd5e7639c1b1def292537745225337ef0608d2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "79fbf5ed6e134824dce50a1fe634a0d75ed0f3f26fe8dd396244c962489ed1dd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "10a5a35e7cc2e708a363835b3eb3cc03530141dbb02291ab978b4f2460af87fc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "aa50579e89113b53fa2a60f0ec7fbb37550b7ba37f0102121790381bf5475b4c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "b1f7f58f5cde29feccc9022505169d73832e048763e1f3d8cbcbed8fc296eb3a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "0f9ce4346c44ab6f5bcefa69a35bcdc75d2f47801e82d5a29a70c550d3de2f08"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "22fd8675954a1474d4c843798d6f7341333c325624bbfb364c578d58790129e8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "5b3ed23d88ae7fbbdfe5a8433182c2168d215a90499f766da6617dbdef097bfa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "cd7f7e8d8eec570183187a92a0593f22aa91b4fec8a58fba170a155324192cee"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "842fb1602d7c513a539894e3d0546c64eb2f7bfff731718e22b8d13734571f41"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "44833ce01f3cb2eb0d1e94c5073810018624d810038e66753ad0febb6f2cd8f5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "67a3d7bca6c5297f191c679aa3a4b594bc725eff8fbf2ab56f9e72483ded20ed"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "39cfd3111042e2cae6292cd4c3c72fce9f04e43f1e9b9e1bdc3d877285225c21"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "a143d828ed3cf3cd56b851cf564d9cd79e996458aac72e88f6da4c6442aacf32"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "630596a5f75c73ccd83191089f00cc0f0419b30eb402258c185e31b7c2f00d0a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "11fc97a546fd57697bbedab09e530bba30d279b46f8a21414bd9b6e55ddc409d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "ddfd052925b33dd8315d905e155f0c16c2e754395491bd211ee2a07ae02320c8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "c8f3d6a3dd1921afdb26e266dc4c783175834314f63614d3ce2b1f3b9641fb0d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "6494c0f8d623ceca85aa9b982e2c6ef1b64395dbec759742ca7e6b0ed06cc3ef"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "827cee1c0c2aeadf5dc7ce64831e817a5b14e9796fc4810be95e35801bd81f69"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "5fa4cfd1c1fc4d22553368e0ba3c7df57e28a6351788c875284f244d43b34f32"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "05e232fc6050fa7902a8257c419bb7614397d23b2b363deadb879d77b3d0cb0c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "d57598a9efd2ccc3efbf7b67d05663b9365146087088cf0f56dc8f7bb54253ac"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "ecc5411936ae97b36b30a6e476e8cf7f3a999ba9ea110135259eb86e10693f39"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "6642e07352c0898e5c5b206ab00b94defc445d81db445c3ad9b3160390280600"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "fef02bc91c77120082dae0d05bd864e5935d4bb051f31e4e91c004c311977713"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "3345b37c628a8aaa606f83c1420596fc661f6efd3b7ebcdd56560c8231ba9118"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "ccad27ad5799fc3dd5d320030beda3039fe747a97ab2d9147fa4cdc302127c84"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "e14400398c5e205c555423f6a883cb0b912263438676223a70c1e6ed6b057391"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "393d8251785b2c2876da8c1d17f7436cbcb6a382fcebb9622a93fd327361d8f2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "bef31cccd71b7d28c7231940302b72ebafb14f3dbeaeb9616c797cc0216574ab"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "70a57e3b152842b658e166206348457ce5bc54c8fb64a356ec810ce7697f2b5c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "70951abb85a97864451cdd0d0b4d312a0f6c3be36caa4e0b15292df14cc45e8d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "531ddd360e750d7a36cc0d2698d9769d553fd18d1b399414e31aeb6888321747"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "f817981df1e71c20641765b5879156b81bc9026039048e90b91bc13c6829f315"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "b74989f04fe92dc2276ca589d54276a2679b4b6ba98a4be670ccc3eed97919c9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "868ca379aa7426172c8132f574a09154be0256aa4f9b78e0d35c22e9abf7b9cc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "bae474f963ffebb313c50f48f214dcca7d66511256126306f483f6c712f17c3c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "e7086feb1c0f72aea38b0f12142a1e89cded9a8cc0fcb7d742283bd183e82d10"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "2e9793284514c388bd7fb613600a6ec6d0dbca584d4cbe9bc4e25c94929a85d2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "ab9d156b7b5a3f8a3e5413e4b79427276e324489cb2b978254163a9508b78149"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "508ff2abe1f2af32b9ac03cc35b09c93f22522a4c2d5ce698b5069e16c91737a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "2ed7786bc3517bbace835f864e58f2743c4e1252e03482d8e31896f554b7229a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "72493ec3c2c9e6a728a4ac179c574bc323d499f2a78c48ede3d8702fdc9b52b7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "c4560d88941e1946483b02e021408552541f3c0da709988825c53c6d35b1cce9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "aa06c815d5aa1248ffc8b0df0f0ee9a68620a6de3dc44fd5c60df6466fe98f79"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "8e67ad19cce81fdd28c31a9a613d0fde4691aa39c029375afb24e563a37238e7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "9def807d4e24c8a8df9ca0073f3d8f04b4f19b658690e00104527e1fe11960ff"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "fe5b57375d84435b23e4dea3855923c88801244b2852846e801e9a9abb303b56"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "4c31ea43f54d168a12e0fc4ce046150be7cca24fd2fc2608febec4f9119f439f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "43461dace36060632faa39bce7b208360d9bbe0b1257e27d49c239491b274128"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "291d71f1be816cdc240ae39843bcf22fd4005da6d62c39c1ffc29305b74d4638"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "6b22d20089cb27bb21a5c410577ea23f8c9d3282f68d98359a6cec167ed83427"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "6e77c929f374b580441d5a4e12ab65f0e314fbcbad5f7861059795620e6b9794"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "01a59713110c7b80125334fd934fe2c8de1d84ddac2f691b956d5fa26a354e42"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "ad10d2f738357c5e2d9fe9af0dd8b06ac5794fbcfca7c9f71dec5d42895823a8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "8f323ed0eb870caa511ddd26757b342e3032031e55cf9a8edf64978a48b646b5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "001717ea6df78a72f947d96530dd0160db7c9ca5e6e6831cebadd425e57e1cb1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "326007bfa658a3a5e27a2568bb5a3217866b72fdf15c9b51a5d1ea48390e7b5c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "a1c966e82ec15791829a6c80af22071e3b1b83f4698c50e00539fca2ed259693"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "95fc7ec8dec63c94d6930efeccd8f3f98e9a1169fbc306c0368ab627103f4b57"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "987c41e71aeaa76adba6b73e909334cd894ec6091ce6b7ca6aedaf1341a96e47"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "2ee43580d500ec9c9d5671624c383a3e17d27cdb0fdee4e026deae8d73e6f952"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "c423aab5543759c415bd65470864adb143e68cf5038061640154a1146d79cea8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "f7df630be5341685e4b7f1e8c460e6f74272c8b8f5cebd2ba045c2c5b801aae5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "85313e43b367fdd9e6dfa787ff43cc24ae2c1a0c4b96a128da3acc46db07fd9d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "abe19bef085580ff3148ac47a4c4ac9aa175d095252d022389f1d2c9c78bdeac"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "faddca46cb5c3fe9cc13cbe809765f5738290062952c067ee87aa386391769d3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "e868e29c51859050d17e881f88189611d4924233f729ade2331b02ab57382bab"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "02d9819ac6ab55a94ce0a24a8f38591b669f87936ea725f2f51f713fc9c7fe4a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "f505960d23ff9a52f2e6060b62297e60d1016b756f9b1b219867bae2921d3205"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "762c1f164fee577d556de359fc6569a123ee0287785995bf6105689fb26527b3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "9d1967fd06a77e3c1d47689523a05860853e91bf76ff0bf8a4d2ebb2bdf9a342"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "d102e1efd3c2de08de8e76f1bee45810031b0ceb3e6c2ee6c8114696dffd1ce8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "e133821c273c8c117ab19f85f01e7d8589bb99f9be12789aaa36d869fd67fe15"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "c69ce6468e50a324830428dac3391040d07547f143dd0fb1aa0369c39d9aa309"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "c094a7c74ae2f3de7cbfeda5d84fdb442b40309918919705ae974d389b1946bb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "c1df729f55582f79d7375a9e832f7dbed3e66cc466785318cddd8a7a174dccd8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "32033a521474e289540a0537e38dd6a8a6713fa691650715e9bbb4ede386fa7a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "7517943e9a31ea9c28b41312a0fcbd767738f8e13c8388f0d5e3aba758aff74f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "ed16d02c1fa28b2e3b2da1501e23562ff15060e59733dd38b91441eb962527ea"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "3cf513e586193358b2bb084d1b3f92a4d1fa4afa7ba6f3a6ddc9fb0b5fd55373"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "f064b3da2792f2f8c827dad89c9f0341311297c61c531e14e2892a640e175be3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "379670ace2ffcad2effb6e72afa11fda240617b266ba2b3ca2186ec7005a913c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "f1389210f5e3c0cf1ba1d3a9c7cecd6c94f49294d83111e5cf77fb7704610e94"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "18dd1f05bfd4528c46e1c23327fbe3c9cc5cf2745b334b79475aeafff8b72a05"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "ad55da4945f713bd0b9e47e63e67555e9f2600d83bf13e7f17409d8c93031ef3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "94160ac7533a3063136a23eb02137b595db459e5fc2106beba401faec18cb0ae"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "bd3559d524e170102e9ccafc7bf544d5eef7b277306679f9db926efdd5148805"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "27565980dcb22b05824910bf9b52fff4c0b02155347990dd173d9a588f9eaf46"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "e7301ef88580867cd18dcf3615b98b7fe5934e7fb2304b5648cacea859239a7a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "ebefc298ce4cd89ab83dd56a5b191488c2a60f664697a45899212fc7dd4e4070"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "79a6131a0e2ab689af0e6b6f4345eb69407292f461ae6c0e0bb6c39324b1999f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "c1f575cd361b491a7d509a9b70752e0a8150a2bb77db80bb661b7ff8a05087a7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 191832, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, true, "011007cc920fe90ebbfc748ac72f301b6d14ac1324dd9f238a63d4be1b60abef"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "bfac363e9d8b865de13037f04d3ca20faae8616663df30abf58cc41007463afa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 157008, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, true, "4ed3193f579a3e28d2dbf34640819fb766cf5d2c1953816a1c8fc214d0764ef4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "89a0f2f60cbdde2fbb426776d75b88088789149001a8fd301493d74c483b2f34"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "e7e5fa8c165b99839c4c09d8b058d00064d31df00e8f38493df2972cf2d59fdb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 165296, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, true, "fceb05039cb73c976024312685731532c8361c6c37d1d8330864a68a63704055"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, true, "b7b483d8fafb5fab56864c303f546254ed8c2291e0d9c501cd7508b2e700adea"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "dab15380415d0d4e4a70d4bcff80330fc4b033f055e72ef8d1d9b42e9228dd10"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 191832, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, true, "15f4d01c829c53065d27f424fdf4696be9ed08bba31f021ed6584c8c891f8276"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "d8f8ec2da307a52da55185395bd87f83fdc7582b931db9132a97efae002d0e2f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 157008, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, true, "d891b20858fb0125ef9e34c274e9c45a747fc0b9d2724eb21b5ad83102174871"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "3c46d7ea9afdac6fc7e5ed93e1786e15e61d33b79c659f5ab987db0bc0849b3b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "fd8f03a899b716ded6b1b403aaf809a57f5ad05c2414f43058cc31ec4d56151a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 165296, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, true, "c7f905375ed2eb4bbd7c1200d86078b8da47b76693a9ec68720e01f19894d31f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, true, "9a468739d0ff3d1670b5a443750ea87dff324767ccda5f4913663b25e4bbbe9f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "9bb1e775cb96d714e56e4be3921923445cd4f029145a5424bd4e25b4b955e0ca"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "e277a143efc9bc9222be7a554bf99e4270ebf768bad2f2b9496c23546439f3f8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "76075e48e8e60f522272215827c515a3c2dd25f382529b07f19912402e8cf1a1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "528f14146fdde0dd87a0931dacf28fa6639d8eb01f1de6078eda1dbee7308be2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "ef9f2a80c1b00a3fe89f43bbce96e57010195936a7c37b12b4ee5e32315bfe77"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "7a929d46f078c3d4b112b86f5f37625ac13a27dafb8f1c4ad018dbb3ca35f28c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "470e5fc5090983e59344a2c7ae50341e57e596c74fe8ecdb2aeffcf725702582"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "63d267b697256e329797d31acea364d925c93a9bc2af7735dd1d69bf582b5a7d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "5dcd87783ea6a497362a628b4ae88fbc1e0d130b43c83cf6ab86fbd7e0c6bb5e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "af18cfed5c464f7d118170129b979a86f940e38f683b88bef4c3271c16dc246c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "e20877cc9abde46a565d4a7f77c95e7aea4c766193e5ac4bbcad6367129853b1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "76129634595249320ae1fa10da8e9b592581e880438b1617ee588fcdffd5a03d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "d1a40a8f6b5225444faceca5e4d6b4221c0105009f8bbd51816c9a4f68b31e6f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "3a648800f82a6935c8ea98a169c0c9b62fd143d20641777e2cf5605b6023e374"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "9f02fa7008164ca67eb5155587f07c2a9ee87bd41fda5d7e8e12914f775e3aa6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "5fac2342b998f02ee066bc53f553fa6cfe81b6cd82f04762bf41bb59bf67da6c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "256783a1dec5a3eb14b1ec5cb994b1037de9e127be2926066154b33ae13195ba"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "96f783c824cc31e62dd77f0bc63052c26edb5d2faa7a48df2b5b9d7a38736989"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "bb0b253d822331dde64a7b63cbb87b2ae9e2e1888199545cb96c5770d575c9ca"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "d9ecacf9a31ef0f989f629e314bad2000b59fd2841a9ae0d32fc311fe42057ee"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "e70a783adf0d2a42f916c8826417e45e6447908b87e78b7f3ca19858dedf5f90"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "0f146e5edfdd485d082204925b0aa05ba2b841c50f562ba23852acb37359de4f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "c65a0b8d2ed11a3067e6ff56793e1093ab99713c90a101fb59bf4f6a7adb1f03"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "60115bdc412c2f2a464bb9effbc539ff6df12ff50eae035dad50fe92e11ad86d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "4b68cddc29993faa457a8a85ebd1b33ebb389fb7bb5c3fa6b1cdd381ea31ce50"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "c0169ed55f43fea4e1121eab77cf938879d861d368a0334b36627de31a73994f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "78280c15104d1c53b536fc76c3cc1c6942def647db09702c166b5de4042dd21e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "db5d6d7737768a1bf390673397455f542f656b2a6767cee680b74f3a20b75e2b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "c8cb0a33ca297c922736170afdc7d77746c830dbcc378a4a5c1f79f5349268b9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "d4602e6328ae07b410e7ccdbd88eab932598797046f7386741c437e00ac6aa39"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "802316b4a53ce543595b53dd515cb94ccfe09fff82521347e473d165ead9e8e5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "f9743e051a8b6e6fed8b30798bae98fbf183a2baaff0405f70e90d42c9e810e0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "ba9aef53a734f0143a3f7bfc7fee155a36a92239bd6b2728dca0e2f21728aeb3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "d5f5b7bed27957c0df49bb1a25ca34485308857575da4e4d2f2c509f3666f13d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "b4f144ae2c6a0d417f5f55b3ed6507136668335961782951ac3af994069dc554"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "d0414cf3e5ef089024a25799218af0f7d424f4b941d81ac1fb96864b2acdb906"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "80b333d4a21af0a3f8c25e8cd3d7d9d7d996e48e76c8fe29b85cbf316be3b07c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "ec7041f9f8bb900be81b8b7abc2eb35256c4e19308b68834c3a984850af24b58"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "be6cd3cce91775a198e74456cdfc244e9eeda4088ae032dcc80920338d61da6b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "bb8ed3785d9d46bcb66c8d9293f3ca295bc3f6acab88978cdfc84ff8711bccd9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "f21795fbe2d6487093581f351881b74e435bdcb04ac3f4a2f61fe216eb3dcac1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "52a33a399c7b6dc935f372ef621819fbff37fed718597a5c64ad47372fd6a765"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "c9d19dcae81c60cdd45bc7df2570e8a07eecb0e1e2c8292ab022d52098d94d91"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "ffc657ca339adf483bf64e96c95c43d852b910d366c22dca7ebc696e86a27f58"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "5ab8ea53152e782fc1229638d5113caa8a459976ca06ffce552580f69ead7e4a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "d3197c17316b01a20040c4330efc73d7de34984bfe3929e689f0fdca1f86a542"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "01b1a86e06d49dbf9df0a41264605e9805af6a9091991a9df4c07682af00f50c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "2432b6423eec6c404bd190149725237c41df6ab4a267b0a1e3c11420eb93e428"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "83b0170a3e548ab4096479e742cc9fce70cdcc3ad386bee1e331565a4dbe88c4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "eab1773345f1ec7377a108699c19333ea9dd6dcc9163ef34ad49a67f2c690acd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "cb61f878e87e72fbb30f10b1022413d2945a7548f86dadd1ebaa5dd5697f21bc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "480b866c91841136a7e76508f413f60fd9efb6ae76c568528f367dce38be860e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "1b87ae72f98313cea824f7a4bd247e0a39666ee126a8a83a737dcd05d3f41404"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "cf47ee6bda7115b712a6881a098c95878d0cb774c953f30001a65215139548ed"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "281d1f986b9a7f457a65f5eee27501d65d775a38f6a07f2ed02480dececddbbd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "2e87d0e4762414d1b774b942f8c9d27177002f0e8c9e66ea7445fcf1aec1c5ef"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "90a3b693437ec6a48035734c9d863d2a9dffbec624be91a31c5263dd88010da5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "4717bbd6eb46fab2695c27c3b95efce040eca1e7d41e50e63931d919fc84db06"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "5e3bd6e8b834c13a7559484d7ad375f6ce31bc5cc6cfe729b05c1a792ff6a04e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "27b5c162539610834f4f1d951b28ecf3939ebb5c2b68e13d8a811695f426dfc8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "66e8b6495ddc22e9a93e561f199c3a6ad18e39cfff23f0b0c4e6e3516507db69"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "c7c4b77d71ffe59843b927e4da727885a0d32faa17fd52d5fcd2f4e40ca1436e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "881282825d5b71c0705e972a35be8d5435b2e678b0fdd81bf603eb173c6d1f16"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "adb6f3d829b6bbe5413b1dd5be0b8d25fed42617ce6854f635c51bba798da1d8"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228328, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "228883288dc9b532ae038da03b8e5fc9201d20d7315f705f75a3132647197888"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 228344, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, true, "0c7dfe50c8a0736a2731bd9ea8f6f70c3c903f92024ec2e6dca2cfe3418af236"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "66d6d7bba54841787ff586f865b7bf0ced59d44dc597b6f70d12412ff75b4a07"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 164976, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, true, "0feb226d7e54fb4e36dfe3acd219b9366e5d8f17ba880b4dad4c8339574427ab"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 197824, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "06e5f62cd20c775d895e8ec7fb3cd02636e21b59332a0a2ae33b41bc59d053e1"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 164944, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "9c4eb0ecfdd51745a2b18dd4f903d6821e8cd8dd08b78d1083ca442dd6152d05"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 197840, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, true, "646abf72eaaa3957f0dc70da43556e3cdc461bc52a08464aca5f9ccf1cb2bd69"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, true, "650f5c312f041288e8802954efedc48f8a718f90d07fb8da7a189a6cfd8b23ae"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228328, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "832664b4bf130dff4a0d5def9dbe0dd5cfd40dc6f4882a3c5ba0af3900cb46fd"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 228344, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, true, "4cffb9380eae1881af39e85526bf375684afeb6c7d192d5f1b2c5e1603156f27"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "34b8bb3bd063f72e83caa3fbfa6d6c5b69abe17c131372164d3ce796e9f88275"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 164976, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, true, "ed27a8befa94c79c18dcc47155a324aebed8d1ff9a95b9974ba5a671eecee47f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 197824, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "ea8695b2e743fb1ff1e12e4138d44729f0d2ed1ddf987976f4ed5469b1a72ec4"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 164944, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "2d7004b1795a906326eab803135d6d1f89672809fa7c12b78b0b879db7d1ff0b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 197840, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, true, "2bc9dd2d431a717c8b1dd61a47230eee560633004ec80949c715efdfeb6a5f0b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, true, "4b007e6fc0915783b22709e1dd6f24f0424c1e682b3d48e203589aafbe5a7bae"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 183400, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "ee9f9b8c1ee465896c36a204c9ef307a43ccef0a44ff632b115ef28894dfc7ca"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174696, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "bca7769f394ada2b1ee45251873e4793934dbd7ae7aec7f160a52ebcdb4297fa"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 180408, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "161ffe0d26ed727893ea093263281b1e2990b8555b836855e4250b5d5f5b9be4"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 173624, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "6f4b48c6e4011e71395f1f100ab2155cd5727511b76445e8c1c23675d04d48aa"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "28511a3416ebb436adfe23d581c27a1f7f1a025b4dcf584762f21b4ad9e9d5e5"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "09a2d304eded922d704809981e10b436a46d6ee6f504fae184ef48a9af0fb15b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "056cd5c178303b1d08a74779a15a04bfe51ea760dbf7c46c7fdbd7458294a480"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "9166a8ed7163129fbf2ded8e8dd47a955657ac7404035405bc167a81e0c35568"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "6e39bef661454301f28bd0e9379d79385bfbd1f52bed485eb9d59001ae2d54ee"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "5ecd18ab8bf292aa2d902b3921a355cc60d0b210f31e1cb54d90661f849d2db4"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "e488a8bd62dd0152ca5374935530acf700ee422999b9cef41f2290ff7ca61550"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "8a23e07636a815d45cbfa7070ceb33cd17bfeee34da04856a0f9692c1b4dab04"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 150800, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "809d59b874314f404be49bc5e940db259bade400f703cad143e6fa7fc7d8a712"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "2732f52492999ee2d526a8e3532a1e328eddec1957a9b575eaf0930e56bd09a3"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 141968, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "7762a319e70d933f70d12fa2a214ca3bec35da038ef4376d4fde814182cfc2ba"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "6d78599e3e53d929d13d75178d089f8f5af30b0fb995c104b3bf5f8532ea855b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 183400, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "d4c97b40d1d841ebe4801c932a8b8b589ef46ee3ffed4f40dc26eba8595c2dc2"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174696, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "ccb5975c1e48dbd12dd5a3aab6d0fbb6ff3e017766822fe20bb01803cfa5fe2f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 180408, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "985ea8200aefa3a679d841e4325575be290a454ab9eff2d16dd7be643f9066d6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 173624, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "7bd0879e7ad27f5c41b59f2ec8d84e2552079f75f34448d7b4cc7ef72202fa1f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "1d3dc49b1f44924d456507775e2334104602d5627bae3aca07fc2da0abd37f8d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "cac98a4cd27d8d91c2313440737dc6aa23891d773b56980cb2fa8cf1f2bbc31b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "2983a624bd4fe4a5261488d37e19655f1cf3f2cfca00a4748a2756faf7336b78"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "4195865dfbd2904d5778f33bdffa94ac262ee892fcc814970f590dfb5b1d48f0"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "f602400c0b7c2e619c19f6942f83dad65b8d7a8116cbbc19bef66ffa257efb57"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "c28f1712d3d5bdae35970c3fd5fca08fd2c52873a28eb91e9bf8c000072c68ff"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "f49c326624023c926dc57f9280446effafe6711f0a04d26001f1e184ba5a1532"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "4012b1df323c1dd1643814df4d001733cf29ef048f9c6f84e84b6fce8feee127"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 150800, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "ec84f08679827a69e0532010b09f26f3535409202997b0dc00b93eddd17ee0f7"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "22fb0249cb8ca40a660bd3ef4182f8e8062fe61a63594f318d4734293c3e5f52"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 141968, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "c7deccbbbf6fd719feb537f462172c0fc76dbe743ff138b8ca5f9c8dbfde416b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "afa966fc3caa41dc10d888196d60c32ef1676c0b6642e9ed578d4b6c3240f47a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 183400, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "fd5674940bf656e8cd198c8ba1dc3756e75f58c2021a7b30cfe224a4027fd0d0"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174696, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "6c25a21484f03cd2c85e92a2eabcee33f6ec8b18f8886a3d64a32c4c58868cd1"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 180408, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "d4ef67401ff2cfeddd4db933bc266eb704e994bf6acb965a1980268c61a5c2a1"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 173624, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "c3996e08fd82578e80bd4a853c31dfd9a4eaea9692893a9e1be52e0544b51a6b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "031bfc331949a065b0275d3f028cbff2a944ad8df9f2286e905dcb1e7fda8572"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "c54e78d1697e0ffded073c1e7c89b5de61ec29498001ae401f9a6829761bd3d7"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "fc24f14a4caaf68de2b5bfeba3e031b82e996e2731f37e66a1f8f2e79eb03920"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "b7a7f27090e48eaf8c3dcac3a8785e05aafe6f5354528417ac93e1d4372ce6e7"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "f35926eaf0b084239c2e82a3b4bb77fbfb712add54a915eebeac9056c664b0fc"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "d4704646e369045a40b31f0136585008d1bced3363dc5a7bf73b21ae55be413b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "c615831dfe3ffef2a686824c5e9a4a5135aaa9d6dc0c8f87d096a38f1c32161d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "9bc3bf16ada60d5f10b89bc2d8b79f11e20b503a229757bf58cffb8ae9adcc1a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 150800, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "ccc475985ad0d87a3f255f96b1d3e2cfbda0a41c61ec0af589c20ae56e6a8c86"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "59d655138760a241b287a131a6da7ca539d0beecc33ffcf44ffc25469d485bea"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 141968, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "cb0ae515b637b07b2c473ebd9ae243a90a870b1be2b5097503d76e22855f63ac"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "32acb90f16dc2adb76e1906ff12e86e999ec65e9c11eaeffce4fc428d00dea72"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 183400, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "c49b8c7f222a82ba2693d6b29c71a47ecde1863ff4070cb1d8976d0f39954d38"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174696, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "ff2a75837eeb0e02e873bfb0e03fabaff86793a26a27c8c30ca54abd38d5da97"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 180408, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "51d740f8bd3d87f6a5f8f2decfc8cf12f20c3976bbf591706f81f6544131a18b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 173624, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "9740b78a00ac8e1acb9b085e4d4775c1eed00138467945c2c242c519f525b101"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "9b96744d6a7757b36901b20fbdd9144326da9a0629f4244a8ccf6785c78040ee"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "9a51e5d7f2dad5078e601c5a7f571c904cb522e87557fe879d3b0383b1b584df"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "3fe30cbbcc819a21444615cb96288d09134dfafe9c6b2a504e0214fd23920789"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "646ac84d304560d9f57ff42379f2ac847fd494c58fbc9c99eed4da23da8bbef8"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "e2bbdf2c8583605bc2044bff20cc249599085b0b6510ee9436d44631b8f6c723"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "b31ac8ad2eee9fed2c348dd4b98616d8712708014612d2acc0a8bf731ddda077"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "081f6e9e33f477a0a7671062336684ce1a52efae0fe96accc4238f349777ddc4"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "011ac542226b450ea0a29cbc6ce2365882c8f0498385d1c864eca6a658c8b751"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 150800, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "63912101b60f17c379e62d739f09ed137e63473c73943fc258548363b280e21d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "fdbd2e4e5a7b43189061675633cf1da8e119acc0d9a9edc89ec5de4920713b11"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 141968, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "e81d9c6a4a86c58071414785b7628d37872cd3be661c9a3fb926b0a00bcfa251"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "85ee058fc8db0aef3d10827e676a6240f98ebb8f58e94c1ed864a089d279561f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191080, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "31f7ee492f0e44160ab405b1f01472e7abd03052a9ff4fea29bc38d5a9cf9e1b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 178280, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "9305a7fe27f8e6d54b40a52b20660ffc19eb5d38e559692cbe3664b4ee790791"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "43e96ca98baf0dbd3c58d9edd0ac75da19b0fa21c76cb689b8f358b8c05b63db"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "75fc831703e824a73528996726f82e4af028239fab2eddf36d5fad6037e0a1b9"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "01ae9900c817798cf21f9bded5c5d5674bbda96e33e9328a570da419d45006fe"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "556d96e232372335843540fd4de94de21d1c201309ecc2455f6ca6fe8f53b828"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "bad2b75a64b0a3177180b1ed3eede0482014c0709a7c0abd1b6bb734ec03af13"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "0b07e074e392e954551f85e44fd997ea1b24a64e23cac4fe9ca6b365b6dcf822"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191080, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "b6ee5e005487fae74a792c44f0e72e2c3e2b33df8a1f1110ecdfef60aa3561ba"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 178280, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "941f2b1a872346c9c638c0eae5d0aed32061ab5cc03e4ce4ffe2005be51280f8"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "5003cfc87a749e4d1425f75f1497d59ecfe7dbbff382cd3907ae98e8a2103a56"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "ad2ffddb341f9e9e0ba9ee30b6feb9e2a01f2a2aea025aacb8128bdfb457194a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "96f138482581ddf64d4d3b1d001192ddbcb6fcb7c9a7d45664cf7062e750d830"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "f50bc532203df40a6b61f3801b6aa6e82a3cc50255eb481c96949a7b278391aa"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "e021ae8aa0c01fd6ae4c39b9923051fa786bc4dab5c73813760b0b3e2aba9574"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "640a0c3d6cbce990d0d2b679ee74b9492374b10b8225e090387f6c0e584c9e7b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191080, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "ab0fd88fc3e22adbfa77bb54399c605f59305b24b1d23ae0f30873a4d8b7cfc7"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 178280, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "9661504b13aced0cf81e3e1f382b68f3a3d9e6b1ae73ea3082a2af6b1bedd061"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "4a8ecddff3bc5ef760ebc56464fb8fb39689af6ca0caa534a5173a59449a1863"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "07776b88ae8f22d508c129a9a2ad90d7b3bd6078adc0edf651141cc40a735961"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "7fcbbbe9951bb3d18cd31bc91e21ed73210ff9146ade52de9ee7d67397bab3f4"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "5ae11adf21de538cf60645f76a5b63f8178750a89999b4e845116beb8f2e6ac9"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "1da7a524cb6a8c3b3d336caa5260ab66852aa3a8e7a61033c21e25fe2b8807d8"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "2d81109bbf9d9d70bd967da9b51d1a6502caa7b19c5b968d385db277bbe52f18"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191080, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "bbb1eeb5579b6d64b869918ef09da570d49a67a0a5df3138d08a06fbaf2c1360"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 178280, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "15123df2fedd47299b344e19a118ce8282352096d1a8c1fb59387480bb8462e7"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "82526b04934ca766666f87a9d9919607391a6f197026f0432173efd32b303479"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "ff443b73dd42f8584a6fef895b7f96aeeaf8a610622b3d9a95a26c078742b391"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "9c2bd0b3e601eb50c6f9c523d8d765dc8f02508dd01f3c21a200fbf686315e7c"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "7afb9565e921ba28110cfcc5bd80a3c007ec22a0eb7e3bbb6b112eab4afa74ec"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "d4b6385672f46ffa8824a7c5d743916f7dc1f96d0ebbb4b86f40cf1b7e628f59"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "93c8f877b9e0953bb208042990831e507b5037fdc2ba85b1422f68ae7c5a7bf4"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 199864, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "a55f2fbe5d4b60d2399c8444fb48309bb3548969a5aaf292f5afc87f7a29027e"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 199880, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, true, "922f14e25530b099698dbd220b2afb86a9123dd80d80ba035dac4cdf57ed8a74"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "d27157affbf6189add97f01b1b2b2519d072d692528d29ad01435a4b353e4594"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165056, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, true, "4aa253832371e82ac2a9fae3caf2e70e38bedfd81027b573d6c6d98fdfd8dea5"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "80ffcf01187bbdbc45ac2bdc6fcc9b3333cd175f257be1b2ffe3be0dbff5bf40"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "2bec33148e5a9d5d3e378d63a3a88fd29b7dc6ecda36136ff9a9950028232c25"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 181536, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, true, "2b91df54706c7a57d42298296628869652b0b9a20b5e7d2a6a0a9feb04a26d75"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, true, "93124a4454871e3ee336a71a04434d69362eb0461bc7a79f1b9eb845027e180d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 199864, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "c1073f9f6315f1f434067cff84f1f3deb5d2d1bb5623d203b60088ef2c578b3c"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 199880, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, true, "6302bceff1497babb53d5a3b3e52c137b16ba78d5a615dbbce1d973455eb1681"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "a1971a127267fcf70a2a135f71f22a0bcd7848eff385853b9e6c2ba9f3bf287b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165056, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, true, "9ec2d62b54c8e80867442962804fe9582d16cfd73a0740cc6d7bbed4d51dc665"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "740a493545992b25108e95f857191b0277e1810f87db59633a0bb5af35212060"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "03bb2c654012a22e4ebdbd10f999d29aa097abd0d9a18e300733ede0b5be65ea"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 181536, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, true, "f5f52763005ab0fa827e05aafab703cbc5260a93b08756ecddb37b1120467942"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, true, "cbf1f81934267e5235e3bcd73b74e32cacf3b512abec91684c3593684e79a1c7"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196792, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "c1042a92cd0e6e0893bf3dd33e12368b2f73e274726b4b3c2c978532caf79177"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 190136, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "efc59c425988284993692f0c456b90c35032df4f2e45fffcd3d5cffc598c25d5"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 195848, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "611467a4566966e8e22623c4e4bd8e602c3a2fbf181013e6b6e04cd95e78fdd0"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 190088, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "3722468c171badc9268c0db4f773df8333a76ff9304a601ac5a2c49a7585d6ab"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "a183fa40df58aac53f915d301d9b7ce50b5a9c65bf76e9de28a8b10cb4e17402"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "e6ceb86b887616703453459cdc9d61ce00cccc9fc151cc7afc80411c4484fff6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "dc751c82ebcffa597c7021aa0e51e49f3efc5f0ec447f8c0c9e45efe0d2fe7a9"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "5589f933065d3eaa9dbb0618208446fedcce641a8103292442bad0b9bca22287"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "e7f6552011b5a8c734135dd9d070721371feeb9cfb75992e3cf42c517a3355d9"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "e998e8e6f6e5305d1144f9be50adb05b07241a9526efd9bfc4dc9525d4c934cb"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "4d1445ec37616211a9a2d712dcec2ed15a00a2f063b76b09bc11b80bbc37c7a1"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "209dca47167a31c7a78ed196e47844515f572654df4813fa8040716a8387f8b9"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163168, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "de788ad18aaf80a649d8991fdca0a47d448c3b13482be30ec79ba00f02e671fc"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "de8f7528305d715af924f2925f12843986b0492c378545eb11301cf82b3deb16"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156384, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "8f7e317f782cbc48bde817a5b4eaafe7ec0d7b702ab17732ba8ccf62e70c150e"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "546ffd584989c29265fa7d4524aa0146d5d91bfd00cec2da922a8b821a710e78"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196792, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "f2ffeee9c1bd723390039268bf10f05bfa95e2f4bdc4a3be9bcfed71f3ab9ac5"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 190136, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "5a05504e670ace8cd7b2505c670d963bf4ad92c14d620fa2b7318000602e580d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 195848, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "0646db4da64253a3a77ebd66639a933fb717896bb10fe9aab4cb535617a1900d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 190088, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "5ae9f13c34fa41f11dd9767e990572d41771dc2603f439d88606d64079d5bf2b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "8bdee1e0f59aeb8ace2329211992768a037605e8b11c833db8bddcc16100d745"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "a9a0379806396fc4617c9a6cefa9afd40f60494def6d8b8eed91a917212a60c8"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "b02c4484dadb2ea0a691077397467dd38f1e755aa03fa6435156750ed8747486"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "b4e91f372f93fa08ee849d751238ca69887f36362ac98c090f41b4ada47cc4ef"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "d1c2577f2c342e7e966f54093d551d2f30988fe21c41454c8697bf5bf13ecd97"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "353289149f43ad2b4f4943cc6ac214ee866be6cb70a9ac5b122b2d4bb1933bba"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "91a930220fe0328bba2c105ca79b02fd7b2ceb16e1638435664e3a7c195a28a5"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "9da76155153b2eeb3473e11e70aa450470a668e9cf2669b06b018c1d4696b3d8"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163168, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "1651d82d14fe7e0a14ac9797fb98ab3854cd19bcbf77f7614a0cbcd80cded8ef"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "6c5e3e90ae5836053ea014a3116a34e07014998f9672ffd8bf5bdbf02de07a38"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156384, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "7d0de02a7a836fefa1a72c1a7adb7deb163042c6d7911fd705b160178fc0dcae"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "71ce3ae6898d26924704c2c5ae8921ee8ec3d231667a13b2e0619901b5aa67c2"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196792, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "7012e6fd9747fd9dd9d0f9afac397c06bc2b5e7f79b9c88b72cd14ae866b4861"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 190136, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "46c6d16801dec038aa814998273b6b562ffce2c61f94fa72c5c574ba62bb4c35"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 195848, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "121100d076b38ed6afc658a527318a21662c9c079405b6414c9e1b5fcec33d41"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 190088, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "0812cb0c1d3bf3bfe7cfaea0c675b7e0e750b80687747418b5f4eb89edef5b63"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "58cdb35ffe402cc61eaaaa0f232d5f1d0afa029c75f17edccd671a68ac909503"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "1a71716ba1a1425442cdc285ebc52b4bb5eb971c97e3e1a98ef31ecc159f73d6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "0b9e5eea370de87ce2105c5fb18b2bcdab1deca5a90d14788c4f478264840909"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "abaa39775fd8d079b4e7757cd8df421c12a1934676dd4defbd44c6d4c8224543"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "1c45f8e72f2d28c1e74673fe87af1bca565449e11071786d27e4b2f536b8ab47"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "aef9ab46d0d7f698311e10ad3d89e477e231f34481df3805e2ad171fd56265e0"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "1ad9f45526768d5f36e9de83c75e489f783d1a86b33272e1b198fdf795a07965"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "2834714ad3852fc0314634af643d08a36dcf2acfa758dda38a3f046a5ae4c4de"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163168, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "dfcfbde1f9cee50aae5e657168321ce26aa7a1022275309b8a6a8d73632fe5c8"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "536f5726298da8efd87acb4ab4310b631904a6c261a8a8cdbe60a1a2e21fea29"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156384, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "56aa39fb8e922e50922e268ddc341499facd4d7abc85e312834ccb9a8984d918"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "868008d32382ca5ac4cd5af191e6d3ca0274dc22452c82e71cc4b0b2814d5154"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196792, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "a24178a8d8110a72c5d9de21883193baf4e50eb67e73481ae309e5caec98d561"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 190136, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "52a544473502d66b91711f004e44c1a5217a7c6feb3837d60493a0b4a520c898"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 195848, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "78fb62bf3f9230172a2169fa1823ffb964565ba62e070a9b6c7efa14dff45eee"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 190088, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "4beb129e8c01941bc2048d4f07b21fcefc63b4d623f2bd04b6e8aca8d87338b8"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "be509305480681934dd20bc17584b4c02ad05088ccbdf8ccec522c3bf05fd41c"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "13f637692b3c7577a3a534730a597f9d4049bc97211f275182cf6cea0a9eaf17"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "7508203c7f2d99143677c1e53260e5f4f4d3365d07cbfaa0dc19b41f077ad269"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "c0f161aa5fb0571056999520fdaf15c50ebdb4119d71ae5df295b71fa1b4e4c6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "fbe35480fed8d4fefbc3f1cd6fae95ea7991a3f7b2c3e014584d9bba4249611c"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "54cf3f0353671ec7396929a9683745d69b864592673eb101a24d9f8d08bed6c1"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "7840d4b88c7519ed7eedda1a4a917671bdb71ed131acdb094113e6bceee949fb"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "80d9ecb1ab6a7a874da17ef569c0d834370c8181270b443b94147d3c074c02d3"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163168, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "e891f222d4b3c634a240ca21d047e208e02e182c9965b66a1f1de3f6ed0a4c6b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "50d76a6c866f24f469a7073c1d8265b68c46b33a7f319f5e1c035eac7e61fd86"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156384, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "516f1913dc904dc7c634d311fb2df2deca77377e528f0e9b0f8495fffb7c3a07"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "a9a6ead6a792d2d8c88e0e5a71baa522ff2798b9de56d02b15f0a8fe6fd9c590"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "f28327114ead90949d1ad864b512d46e646dc1f88feca2a68402438547ae5914"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "e274353224e12f050b6eee1639c0a814d352185e813e7bec2ea5aec22358af2f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "6270a92d4e6476e9f00896daff1dc3cd45658b8c019f37f4d2874e50fd97344d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "48c60d47c7f26135187d49608f3787358a8de4ba2dfada2022a30de19712a91e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200808, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "5e630a3ae789107de459ed076248216da809725cf8883c7e2a0e7662843dd961"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "6239da20295c5a892e01d50c7beaba56a87ddd614af2341559b405944b41c919"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "6133dde45343c88e869e4f320dc6b99958ba216421505a70c9262fa6396a0d7b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "e5acb3f62298c03c8890c06eebe5850f8464065049598ca3d5f915729be90d3f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "e65fdc2171f2b4a5171307f4437d075671ba048ac4b5a44e431a17d2238a4195"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "a2ab0e8be36401d0d5f467cb779c7567e72f5ab03992a330a99fef2d4221a86d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169152, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "5043a3a9f45000f9e3951e16bcf197e319003f11241bdd7734e3e23be6af5dfe"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "fe3ce3cebb64ef68c57a7b6ee6d486f7c9564a4ea16fba1e6a4b81abfa2efb56"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "bb0699dbaaba7ed9347f0b246d6fb78c854866d96e4a758313f07931cbd31d9b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "16fc3531a0e671f749751f32f65e8500e3da098e12f85b20278a44c4f5a0d9f5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200808, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "d959b932141fd8f3ed4f2d99435baa4e57bdc6c729fb12942878f1dac197bcf4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "5157c68c84eda93cf75c68f8dd3017479cf6465fbce1a2394532c8721aba75f3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "4ed6179f587c7dad02ad4744e464399b5950c8bda39ce34e9b72dc72f2b88d9d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "3e2681770d5431798b5b5b5875b37196dc1f97369a2db22a02a19607fc1c3f2b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "36058a93e33b7ca1896ca33667f6977826adae1381f4c034760ad15b175640e6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "7460ec04a037dcddddcd68f10a54244914a3ebcb6e82635f67744d95cb0976da"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169152, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "d2174ae7aabd5975e3e863fc9ebe5fe1c908bffe52f60fc8e9df999901ebe541"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "ed727af5c059d7431c6b282c55dd6dd276814c8d01897cf4864ef7052d7e9a90"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "1d34060931c0460a4a7612dac949e6f23ddacec772ebb5c9a5698fe50aed9646"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "31a294195e4c921db03a20fbe78c349f400e6b051fa54a2cb39ac5d9d1867a9c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200808, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "d9e35c2bdda3c963fa79b4a59258afca053e8af7ded1354c6618480c901abaf2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "82835c2706b30184086f9d94d618f9cb265032d6ba49d4594b937502d30ab75c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "2214dad495be9c7af883f10d27673212027dbf5aab522a8a975ad8a1e209b8ae"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "ca432fb45df05fc3d3a11d07fd23e143d4b8152113cffa4d27b0993e71db02d7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "3f1e3a5f7063c233a76295cdf0ca7665be0bbc7ae9854e43fbc1979a0ff4e306"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "170b864cf81e33f4f9f957fbe703b313cbf61614fa3dd812dee3ddc247ae4c1c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169152, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "31c524098b01f0c368fe8194cf40766cc5eaba3117bbbae57013811ecdaef2be"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "f993b95b653cb52c6849901977516d16c7af3257d9e31f90ffb0a96df2cf7ca1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "3b0a156f2efe6e7b4d0a49501fee1c3410c822b24cfc97fbcc78282562269653"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "0d492c75c631c88f3f9ac3dc7475e9f9b469500e56ff5b946dc4258abd7d4717"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200808, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "90fcbf98f89e80fac08fd333a5b7be4c2161d46d11cc592d06491409216f3b6d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "6740bfe3305e2bda1c19359b6d7966eadfd8bee087df7f2bdf1a0517aa5742c5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "2894e616f8d11cb56bc4a98c83cba3954f75792f2b4c8fca9ca6927553c3d4ec"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "7acbf1c01f28cd2e0967b7620245286a8d0372afb34950f9952506159cb912c3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "d017943b49b8783791c28bd32d37efbf79110b6e2329e8b69ecceceb82477bcd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "cf26f773234e5521e17c505d826ad1205e8bc204f97f1892f9fff6cd9a0b6d83"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169152, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "74f855675f5555034575b3faeb3e786fa86ccbe3dc073088bdb0079440ca4ea1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "e9322f6eb90ea1be7f54a9da5581ae6a2725f436294449b6baa2f1abebeaca7c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "7bfcc434e0d32b2e4e615608ec7de2efb22ab743e77e2c69e0d99f561d4c2b16"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "d9a17bff89249c0b3922eb910dd8ba01266d3d94a5b55391eb6301d8f27f7be5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "d6b81e44e96cedcc8d850fc5add39b46e6c8b7dd5a8ae962f2fefdd5cddd4f66"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 224480, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "4a09e8f751ade609708fcb367b32417a10ab97b6fafd4cd3e33d33836bc63fe0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "ba08e48c9ca252ac4e1d0c013c53b7f4237a30f20ba63a0ea177f033e784c62f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 224480, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "01d3ebfe3bdda2a7d4d2363409368cffcdd0ad7457581adf7d2774c6123239d3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213608, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "2949153de05dcec2bb8dbbbeb4b4be9d9aff37d7f6424527846745087aed64b5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "9d1877ea41abbd442d320b041f52556dc9ab4c994a59bc6b54ba6b7e4f51433d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "7ab4a9fa9b3391aebec7b539685e55c10ee20a1266a947056e2d7119b930fd75"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "d2d8c38d8d4881f99efe65abdf7a6dfae1b1008dee1cff13250ae05668c83387"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "2b078a5721087ed05df6e414fc1e0d5b8859202b49e1ce53e57f1f3c6157be18"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 224480, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "388100edab8a923c42a9a2c4f577009e5ffe9fea7bf739071c9d820485232316"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "2d0342fb788b6af740d964e462abb8cd0f9b6bd90071319a92e60c24889e4b94"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "315646ea34106815da0a788e99aabdeb7bcade85ff9116ea122a7b9520415b5d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "e0b5bf0c7ca36ff5619b504b150c00d23fbf2a840fc667f8aa2793080be909a9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "8b0fc29d343ea11888196a1ea1089d3d90693f38502a7f8f01dee5c69772c1b9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213608, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "7ab9eea075187f43ad5fcbb2fdc84d74b3f97da0d4a7bb2691b9cebc93f5a79c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "5165d77e15a288aa7e351d35603804523803f22000cfc857449024ecba2f3732"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "c628d0d6a3e2d6e6f78cb2ce34ec4c8b9a7c46780e5354f0db63da1c7463bc97"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "fd53747e760ca6bd407968e4ded1896f59474187ab709ac1b4a94b5283169e5a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "e05fb675a72ebb9732c4b3afeb32392fceb8858cd97162664c1ab68dfebb9bbf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 224480, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "8bbad72c16911c4329c1dbb481257ef6c75bd74fbc1d93424d80439eee927f65"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "1263c40cb8c20f7bbe92e5f9ce204ecfa7a3225129218f4fb666041b554bab96"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "8a5b1b7d9187b340e7d6d8fa0fa83a66661e40cb02bd34148af51a4ac133f191"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "6422ea67dffb95ad0ae63d8719fd4eee862fbadf145ea7cdf0c5dea18bebf90b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "1a8cea3e212348160d65438d6bb9388bb3a3eeda4914c45f5046366a05effe1e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213608, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "68d12099ccdbbd49ab0e7ddc78fda2e3118b883e4fb836deb79ff328b99597a1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "5ea09198214a51dbb26fc02fe1c213c22326ffcd7b882de77bc2e56e1397971a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "7eca2b36cb0941b3f5e94b6fdf4fe6dc97c79c49a29def830332c896a5db3d9c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "b5117232733cce3bd6a4883e73e4814240c019228e6465874b782a9619e47a5a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "82dbd1f1a66e1322bdfbe3a7fd0cb149b60e87d82b470e748cba3bdc099fac9c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 224480, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "56c2f5ec3f7689d4a3c90fb0849d3ddf70c2be185c789d94d037401c79fcf6bc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "ad76e687bea8cb3c816923fc0098f02f94a38140ed404b405e81331953c73148"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "46748200799aab64e2d3c77f53c8911f8934546ed0f5c2617e37cc647769bbed"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "1ed39ca3f635354021b66ed1d1d9427c60bdbb0fb3d69b247566b4c4fc556ebd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "5094f44ffb5deec43408b29950702147b1323b110c1adfa0997de52bb45bc013"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213608, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "7be384c0443be1052cbbf9aeb1b0fe7d6bdf7e463dda73de368bcb9e40a8c0a7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "1e07307fd9b2a5c20cbfcdff5ed829d486e9aafebda51d8216098060424abe38"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "b81b640df88c97e9fcda206821577f6448cd81d6418d05f957f2f741e0d8e6be"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "d7652a39f0030a1c3f7c1f7302025b7a722cbd5be0aa34394ae5b5ca83fd04e9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "c1e3b9c134828f4342637a0c25e5eca762ed01ce13803f0bc58a6c37fd63d1b0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 224480, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "67095cc32bdd40a017343ef1bea6f701c12383b74a029fd2ff257f0279a38c42"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "8b6587de598e903b4132227f9872998fa34cf1177b9da80287f5b269759303b6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "56b57e26474a9dabbe226ca996cdf294695d62b777d8847d6a10ba9fff625a78"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "130eebeb043a5f6d1ce1b16127c6645338d9f076e75b9bd605354a4ae7f51939"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "815780c4511e5e91b1ea1c323865381d21979c74fd3c3d22309d4c6b137b5d97"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "05ab99141e9188f81c5e62be4ac7b600e334740284c922d03cd8c25047f9239b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "3fa5a0f7e958440d358ffe60a64d11aa451ed71fdca43c3478330236ee6ae9d1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "b682ab39929b90db5792f25fe8e63cff0f6816643bb527b52f33fa327bc2e313"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "1de5ab226d4f8c787943599d6b75bdfb0e6557ee5d32e47417a62d5b199301d8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "49b3ea182675b7b5796bbcc19c152b567961cbb907bdbe7637d641138f94e863"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "9e791c8df6fc0435e2e6400cb295e0e1c1ddbabaaaf0a11d395c9715033a1c51"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "1064794f1a1cf7f5046cbd33d8dc8aa4a856480f468b5abf3e440b68f06ac0de"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "67c6937436c92b38ab67c07762cce035f17c6a8a744a30cd4b833159b3c5ea74"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "017be9578ab2eb1f44bdc99408de4f251e6b6c5dc86a2a537f52e95d19a89e90"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "fe8418e400af92bb40f7422f1fe0d8961896af2b78dd4fff47809d4b06f66ea6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "90b5f93e831ca72dd697e648ba0667b71b830233a9c1af53bddbdc491f03e6a1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "3cee27afcd037ff7ad27e89e273e14cdaff607671558b3c22a801045202e1cae"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "319f56c11b039ba62d3c0e35aa1a9ef584d92e747bfc03c17fd22dcb556f76dc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "a84c6aef0d55cc9d08bc0818e00a53a230091f32e37123fd0ca8c372eacc815d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "85010e0cbfc7f2da7471409df3f53c0d921e1875ce829fdfd93be1665850491a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "f990d884be23c242acab5c97f8392f5fbc69c413c35decc4fd3202f1c7a57394"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "e40cd3a6b07be7ecbe93dc34d50da88cc7603b8b7398d5f6f0ebf21db5b971b3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "80f380f0d86594eb22d2fbec3239a8c1ac928e5a6fe67541ce098b473ddd2b12"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "ef70dd510ccd339a6270644ebfdca112bf8f1babb3f5113a20f802868ac2ce84"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "5695c9a251b4c7d4900d2bb646f189726c035ff224e7aa23a12a24bf29f1377f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "9d50e658f896285aac1b8c431e9829d38f3e2a368e43da3cebf379483bebf1be"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "33a4277cf9bc7b6f200d2265e66d10c56114c76c4cd600e48383685ab64d03b2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "9fbbac99f9b3c39b845131a8cbd057af60593983c50ccb45ad1a032f5e6e5096"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "6c9028635816aaef2492afb55937c0bbf4ad33cf05dc7b2b6b11ea4545db3c10"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "c85d42cd6429b776d383af1614eed462759a682aaa9e9c5bf0f4106d9c89e03a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "46f2c15028b607caa92ff46140a63ff54aa0259386ab31f861cb8549cb61f9be"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "092c839d11cb0249805c71b9df4da80c8c1d3ba365783d80f51ead256afb483f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "7df776dc381adbbe2e76e4c4933c1ab8c95d9d30e22dca889ff4f11e7683deb0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "dec2fe38443a50760453d5e0a1f43016a703f4ae8058b919a2eee599154eed3d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "bf3c4637fce9f8c18796c0ea78f174e8908f8a449220547d37a8149e9ade8d91"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "9058a8207f3b4a1955da6dc3f9a765f1a508c2c672cec991ba885121eb92cb0e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "e20dba9627dd9d203a45203ad58ea97559a9714686e12fbf8b255dc2cffd92e2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "892297b75502645fc347ccc22f1358521c42a2586758c8c8f83a97b9c66e290f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "50f75a9c69338b520aa99c846e8cb30329c945701a7a122aa917374be562ecc5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "466efe321b0fe49eef8c1cb2589e708e18c7c37b8f9d5be7d30f424d6b306969"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "ce366600d8c4c211ceb3ee93f6023beefa6f9a5b2b29da2159e7a1d437ad5350"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "b0c6d0eab6d01d77240d5a57e8c0a1afc833d5174cfd757de8372b1037c11af1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "6761d0e7256409387c6f516302e1660a95bebf9074f13f48b26ec945efe2d3b5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "8dccb8878baca5d65e7cf6d1681fd02f516d4fd8488abc4107f7e9ae3acac9d0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "80886331db76a57e3017e3d4a8a8da9602ae0980741b67e672c61ca7dbd2a0cf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "36b804a88d109114867833900abb917ca13c198ffb18b02e43faab0b43998727"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "468bbb5a09edd18b1666747b95eeeb9d4d34172dcd6143418849c5a25eb0f2fb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "8030ebaa5ad4ec9e093b566d4db375c16c357875acf839548601fe157343f76b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "b34867e27457e58b6083aaf153c6dec882685322b95f669a51143f7f4d3ecd60"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "e05982fd218c049a970c38862dcf6a785a0bfe4550d5085ed9cdb5a54e08358e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "cc4aa9606368d20ceaf672013eeed50e61861710ddd6e0fbfedca318f22a4bb9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "a06beedd4ce398179868be130798adee654baea2fcdb674f211162b8ecc30798"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "d46ec92dea5c4365f5816d8e9de3de13b7ed3767751eb44e0b2f999cd5b73084"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "793c0559fc028d2923a562c6bf27ce4d049e64c03f9c92605aa10075721f266f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "55aa9d48b1bdc9df9291322d647d40429b9cdd7086bda10f6af73a289b130d49"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "7d36f3f6f647ccb01e50f96896efc0ab81ae6fd00ae3885c26b0087eeb16aecc"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "ca34926646bab08d4ed5f56d76188aa2187298370228e4382e77a9733a7f4329"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "6f811d1581949b5c8fdf56886274b6c08e653bbdca4b49ab15eb9c1e7b973d25"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "3c57baf9f907b0ae89b77034957ac9ee9f1f47602b80a1916fe3a662646b1cb1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "eb51b54da9975a58633591f77677e3954a874e7b28192b7530af79e89db5fcbe"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "0e070027c0cb8497155d69d5d62e90b1ee11b4cd8c9d1e04c174303d2bf0518f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "4349282bcfe995582457577a854fd3606edcad0ea0efdbb479b1a64eb86dafcc"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "6804adae0ebdde442d3199be9b893f612902ca91d6437891641eed33f17928f2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "664620ed0abb83bab50cdad0907087aa04eff6951c77935ac7564c420f49b943"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "05ae5bc8e96261a537c892aa09c4de53fd315dffa09410022adae8df3598dff3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "1c9b4326171deb66bfcab613c59ec633a1d53fea992859703756f5131151bb78"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "a902a8bf503eef7e8e8efd2f311b5e26952d21dfc9db810c467947de9eb7f5e7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "1587afedf027c4817067d1203835352696d6c0e0814d667ab15580ebc61657b1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "cc06024482b32146a7b043cb62286c8ac9f1c8b4e955b302b24bed53d5b5226a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "8bc39d9d12cf776c1396933fe6cb4892f69eaa4d1e62d4451f08f9301ce847d2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "e887cc61cba5da2b00f1a3fcfcede397abaf4f93fc432f571c70c65221bec4ee"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "0c89da96d825b1814eb7305bfc157c04e0e2be026e25686e5a100d320271d3ed"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "95b5b7d2927d86d39e5b4610164fd831cbd416110a948fd7a336435ec8d2f8de"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "ec72bd7b50bec190c2d5d9416d7615724d8001837efaa3ebd1a963dcdf4ada1f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "367e87ae7c24b53035bc8862e9c4482c6702556374442eaef7ede6ef131e80b6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "a6902368ed5b23a9a96a8b2d6720495a77a0c12ddc41befb3f66cbf97c53fef4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "6d88e2d03713644373c5a4104821f86a55703353ce12c8403649fc87206abe86"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "cd6ff849920ba1bbd59457dd9b3bee4681bd31c71a978db194282f2788aada5f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "b012f70d23e2a2e7268171e53258743fb08758e2e6d2d39bdf460179ddf79d80"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "aa8762ef5826f93300d15571ddfb62e4ff7b648a7c7e5317789e5b09236c485a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "085e27e349252b373e7b229e1938115d860f8563413a5d25a04e9efecb2403c2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "cedd47b3768aa40cdc86741471357c29a5427431e73ec79496281c33f96f7154"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "e5b81ee2a34f95496bc3f67590477dbf08970066ec2d67ec583578efaccf4af2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "12e0d206273f06daf88a916532f2201634fdcfad02babdeba1fe6619e6c69dc6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "1326ea2068c89f532da313efb43fb76d26722a412fb237ea5faf05fe55e2ba3e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "4caf0a4e6744df2e06709f8377d5876c337041c2800472ef8398886e721db988"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "e8d46eceba5f1bbb5c9dc4d958124942a90bcf473c36f02d4432d0428729a305"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "da7045c0c4ae62fe467c13306308c490a53b0e647ad3e579cc93c1bd2e8c37e0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "79c75df3b205b5ece33586a4d6fe1d99b247ae69c3b762600c1ba171d183ba41"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "f53dac7f5886eed204ea3f48ea6f0a78ffe8f49cef08276d584fac2b7446bab9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "9710f0f0ef56824e0e946502dc3d56031bc9b9e7eea541fd2e4880440361c0c1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "184181fd791889473dfe377e666708451060feeca45f6a8c98f730d1b3c81c05"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "97595bcc9fab0621730d076460dac93e666e69abdc1ee47dfb453a76f66ca4ce"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "90ebd6a9ab2b4f8a69c0de17ed4161fbd69215b654ddec246dba7e0263de0f9d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "b43cdca46897fe1ab9623fed914407ab9f025587d6ea172efece82e8c33a2664"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "cac548e397572e6ce9e6c0337b31c554502f9d5c7560274432942d30d714bc33"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "05db7629339d43326b7b4b36439c82e74ab77e55c22a8cd17948872bbbd55230"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "9eeaa6bccc5478b287858e5f77b675a05b0efb040a6bbdb98edbb6810daef149"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "8a155852f9633555cda3e271f21aa6320c403239f3e725aa66be7a9bc712ec24"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "c6728f4bbaadf68c5c929060ca74379b4d3e85d6e28772a3c1dbd341969e1667"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "a688c3e3addb66347fc0020c16f127b1f3b1fdbb152b8abd312931629189bbae"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "1b01bb8c2c995e53959921dfb85aa66e7f6a5cb899a6eaf67cc32423b4ce4a0c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "101677fec64c118cb46792fa6c5918e2b2ae2a384f9c0ce00f28fec48d5e7a74"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "fbf4d3118b33932916c66e30f58c16bc4fa1776000d05a12bb317e276f78b486"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "2cd7e67bd75cf05c5bb839106d380d296d8d84bb154f35187cc9efaebeb3bc1f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "a965a991ec7e3614a8ea4674f30a45316696091e1e2f1cf664b2c9b4b2ddb29f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "bb7f6a4f36ecdec1560d3df08eea55da76f09ba340fc747d17e4c620a6695f45"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "20cd7c369e8d542e4693d414a26b48b1edea438a42e0f810813dd06a0e310b04"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "b44b5efab951de214cfeb1ba1e98f9e05942922b975923cdd7735b8a395d237e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "5550313478be34ee071fa4b55a668af5388488a5c87861c4a8cd295c32fea544"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "813586448f711eee51e313e6fa7435562df3a436663e6b40703760bbef905665"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "6b19d12b634e19d5ec7b58654e30b8be68031c7c1d39dbf54bc4911b695e0bc8"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "16eb4c50fa80d03ad47f7f2349bb1537ac2a7b7fd888a3a75437b68a8d1c51e5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "a5df7eeec7cde8d706e7d9ef1a744ab7222cb9e47ebd375974ecc974113ceb2c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "c6341f6e6499c1ac545c64296cc79d9a7791db3bd8404cb559b7d4aadf11b433"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "d6f7659769c2d9687c956a034b0b636dae3696c0bd95ffa862112157a149a564"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "78543db2dfe9694bcbe73e95704ecbf316e5f2e7d469aa2119043b2238a3366f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "184a4688de53928660e8c4c3e850ce816b6717f63a14c31b5b719a7e001cf8f9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "82df6671d6499f7dc68957c50da7ca7ba3423f0470216413d90449c150128b81"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "04b63e8bd2e3223da9840f80d613235c627aca73ca41db1ce59c0ed983e2974c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "a96ef62dd8e64ad90c5a47e227e2b816adb23f90511834c3f51162de4b863065"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "3f830bd6403f4dd94a02df17fc6ecc6f94969fcf353f4594dcca7d7d8c9ceea9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "93ea91834a34ae706f6449fe602b58c7eea4ba71197e193fb8e22fc2785f77ce"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "a163f50098f57f58a46ecde34e519a1afcb1324ef79ced3c6a9ace9ec788d4bd"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "ca69b89d6e4205313f45a3317c074a08674b20868116db8176f3cb2c3718a687"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "46992c86a7753f4fd4038e18b3738cdc3cc4cecdec01183c948969074a0a5096"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "a107f92a609fe29b335a4c0d2abcfb4787f94774f9249cc2b25291fe22f4d399"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "b1084c42201ddece309642f35a4c1118f01cd2a2ba14023d762006609e0d236d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "65b3bc34554962ee65171108d4d223bd61cf7fdedc9570315f9e8bab60e52cce"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "d7e5e582eeb33be60729d7cc5ce86fc9d436f9187d7496170a43659dc448bf10"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "05650c34b76f356bb9b0da826af42c7029c4063ba942af76323a3487d1d0331a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "7e3c2f4e1d6d941e0ecb85fa01bc6318de17be5e92ef99c2aa114f290a3c433e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "83a3b1b875ed16bb53d3c0c346e768030fee577e4dfe11579c6629842a6ab03e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "7a3dea1480e0539b12e87fc4ca5bcb9fd3cbf98163eaa2ec18d6669594c4e067"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "e8a3d5bb0e4e90c7c278285a00202a1e60fb12e18afd04daf3ef7e6fe48169c7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "e9dce00e0190b3c18bbdfe972fcb0be2263eebf02447d471dcdcd54d6c183622"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "ed0472a529364e6b7c8a4e146f0302f74eac221d955afea256ee655217611543"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "204a0273ffcb22d7b9f1da171ec34a943befc34b4765b1ac26d9f8c99ca42287"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "523b8ea6a04f72af430dd4c15493ccbc6cb664e44c30e4f3168f77d004c4214b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196976, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "d426005ddb805d3aae225dce013fc348997ca1efc12b0471cb816be198b0f7f0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 196880, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "0a390ccce0657859135d05ea699af7f962bf3d2d1de157f80ce06c172e5b90a1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196976, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "50b7e430d55280aea2a182b0d7c36eb9c3281a1cd0837f224881cdc99aad123e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 196880, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "a0037ccf4c7d17a3f1303f22e8589b1c99b7a1c091a7d5781d299d4000172810"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "34661306091a715e1c9e83ba13f67efef1deacf6a51837cfc8658be9c22aab34"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197744, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "3339aab8a2d542267acb07ee5133c627b62d4e42edc692047290096e5516e219"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "749bbad0469dde247804c0bce98590e319bc01bb7401a84e201bf302bcc567b0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197744, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "58c0defccd617035cad67e219753abfdb356b12dc8612d8b167897f0eb622f5e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "9932083e5d5f23ceca9e2cb1d10471a9093b29f612ff66df807bdeea72a1d600"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197744, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "5dfe0dfbbacceb3b66dae7e22492fb1b78f60ae8bbed3304d45f1fc6b157822b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "7c1038c67b6f1821d50af4ceb11eba88ae49df1f35477a755d44bcd98afb54ab"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197744, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "12f22b176f947664d0569c9b930351257fd6fa43cff16e0e1a10f4cca2dc4fb1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext", 196976, 512, 0, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "d292615cdc8ca5c675ea86d58bbca71f68a2eeea18a0d232fb38c3b502950221"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext", 196880, 512, 0, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "8c6c419b2e23450f5cd2eb6bc8ffa7779cbc6c9decf39d700472e8f89f913c6a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext", 196976, 512, 0, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "9a9ee1f50ad1803391269467c0a146893a8381751f8d6da6ec61877e5a496b3b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext", 196880, 512, 0, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "d5bbd92ba885f8b6bda6dec73c7c6a22ce8d49f441f6c956ddb0efdf84a084e7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "a40eaa06f78630f1a3a2149d83115711333a15832fbddee5e982febbe8613403"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "e80771304d642a1225c5df4955a5e7596ae144e64eb745b07f80da71c5007d62"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "67c37cb4bb1f6800b6d20a1c59f1653d1928ee0a2f1539de1afbf7e29277df02"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "2a03fe794183d24c9117d6068c243ce94fb6119f9e94da744f6ecd67023bd502"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "1171f925ff814567ff2157bec9e5270fef7a37ff4a95d7e5025318518e503cad"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "906426173d2cc051f3c70083d0e6ca05577641037c9d74e1322ee1036c1de6dd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "c30e08bf7d1f0407e6796a9d1111fe0642b4b2247865dd921b8b6b7f37dd3f57"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "6716bba5a0c937169e903f9765f7a7aa73a44c33fa1ef2c548edf3eee4b5bc81"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "f0dc9c15042afdba477988f613044a706d94b939c3c029eaaf9e4061913d1330"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "0febda47da47a9936b49b054be0bc9044fb9765b27b1aa988cda7c90a33c7beb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "998cb572b7064ecf07bdac8566dad6e2d514b8ab0bd88313ef078fcc4cd85831"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "b6d45396fd8a69e5a04cc845cb4a4e06916863a9da861f45c75fc15ed8b295ec"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "e5f9519022d1938f606bd463e47b9bb758810743d138b92ec46abcf71a3b5933"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "15d33cf1da6868f98b99e2601d21b1c5037d29e44e2786f02c17fb7260083e61"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "47f865fafd48f7ddcad680f16d0fc6d6b0dd32bedab6b37c4fba8ce37605e2e5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "b8736c6c6d0a93b150cd195327a7f6772b84cdae15771e17f3ad05f731d5246b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "7a285379f339cbe3e01d9ce258824b4f244ebddf969e4275b8759840154a5357"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "0f3f268aeb15a8cc43594bdb88eefa4f1b8aa283745d7a51e21a94242787e9c4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "aaa78d44f14c102587c7b257cf8fcd4e8e41ac3a226a86b00015dca9c6816a15"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "519a47a3825324cd6e006a474f5889c7017486b9959eb1304d54c9f9064aca27"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "98c17c0be5f23d0f1c27ee8df0986c4a5c68f4c7d5f4620b6cb2c84aacbdb24d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "e642bea2782c77c662cab5c8d6ccd3412718950f28ddb55f08dd44b2a14f2ea2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "bed6c7c7a66d3744e54a8f0f272a54f5e3567e4ddd9b925798e3aa135b0f1ee3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "c4a384a0c62ff179d03dc1ed6495e41a4687d4e4a7031d1aaea6d3fd685c8351"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "ac10531e70d0814a5c458dafed399505c8ceae39784fd68034e0d106aca2c17e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "fa5f084643e9e782bb046e1d8559be786863d7596db2151d11943a8751c9e4ff"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "c32d566fe519a7287a93e078b02912a6dcc325d50b652adb0d33ab61eea8dd67"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "a721ff8eb3e22a6133ac8c91a1f4fbf980331e0bd0e87b35eb48972bf41f75e4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "57cba0dc4898a5c4a3ae85f586f64f8404130ace0e1c6335ac9d682557733de8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "8c3a66b06a5329b33ea355122b66e91efab892c18ee8a0b18ea8755855863b8c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "dd2bbcee096bd06449a68b94d80acbccd0868d89d241903814e8706e040ee870"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "89b12813cc5b25dd6e50978578ebbaa5b87990f9b1243948bc0a639fe5cef7d5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "f5e2c66207bc91fbf2a6c0a06b0c92d0327ec0111400f4cfcfca63b97cd4e90a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "140407bb9f6cd581f5e059a79547d5e565d2ca9264c576c3e308cab25c2ae402"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "141c30ffdc869039f9c1564fb949939d929eb8b97108491ced2cf346bde45ba3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "63dd35cad147954042db60f37507bc24a06ede28f11a526f6ecb48d3c5f71285"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "a67b82a1e11de9e6ca722de2dbab648668ecd02c846e17591bac7e1ff0852b19"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "8030d8694ba460fea114c59ace52dda490d333f5691bf986716f3b231ddf8fbd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "0e579e92c21f2763023b1cd11dbf2d49d5bd48b4ce5c166d527f31a475930923"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "d4dc428a0395f7f9c16ffc03851983d445f7d169445a9fb6eadc39880229afaf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "bd19173b9988812e94523ef5273efc141ff1263d4e4acfddc8ed6edf52ba789d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "105b9119a6d39f6f29d04b8660592a0b2c7df840c13a63a2eb0894bafa746345"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "27a1fb6ee00cdd88a8232afc76722fe00c85aca06d9f7ce07885f5c471fcb7ac"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "ff0363b9a28e5eb8b6680b059d9d80ec0a6f2b65008ac3b300c7750562516bc7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "adf80699320904693db008572192892b00b4e565310cd72cefe1f822549adf63"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "8adf69ed92894b0df8cbb6c223104fcceb6f3931ff4ff50027f5cecad6e309a5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "1b978edc15d0affabee9294799cda0f1e0e5813d31a02bc6a4b5c0701787baf9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "a964d04a796d948cb64ba9abf74d16ed46a71843f93ea27a593dab1c52e51f10"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "33a8610c7199cf7b01513ad7eee48509df88fd59d7176ea528cf0a666c6c2714"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "98f9303a93334b13ac8f2eb70b5d02abc97de8d9dadc7c73ffdabc972b55b311"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "c3ad3a0a64714bf27bcbc3cf06ef3ba81b770c0d2276687e257ce460df735d45"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "d2dbef61900352c35782e3eba04ebcb21480ec5c327040c241ea7135cac28593"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "e0f15174b6c9ebeae8e1c093cf0aeb1806e1ba8cf6febf7b6a327c633b1ae22f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "63f3dca19fa7ad0cad2f37f792a8798eb1cbb674084c2780999ca0983dc866e2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "a582cd6a95a6ad54c8c6a2b0cc07bd2ea53c13041a5467f8ef5c5547f91de76e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "f5cf0ee1075e8489db90e9deb4ff828fc2d42b51d59074de7df832860755b93b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "5022a5e5d82f9af85f76108892b06c9bda49d146411b7f631466235cd4b94f38"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "13cf97ea1901daacad7b0b5d66eba5ad6f4e582f9973382da89182ede06b71ce"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "195a30f2ee4a8a9042a934c07f68d3c3cc6c332c0a355a52d77a9981242210b6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "e0f510d5523cd34c502060b3cd5ff4123de5f88f691dc142771acffd0ff37fa6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "ada9c0650d61777abac01e3a1c4df75207d982d71ca3555905f6c7b91918afa3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "8a80c654f2a4b8fc568e7b103c0327d57bf6c48e05faf737cd8fcfcee83f88af"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "ee5ee522a10bb88485270314c8650173d9a02d6da6cd941a80be8c1d8ec18485"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "f3a1c5ee77d5227ef8a7eba1d8c5dba07d7e36e017df5eff6b3ff8aeb729b757"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "60730cb5191322221f2b1df6deb7581b321bdefa8eafcdfa5b968aba63c2c25a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "e3e592d0a5b574458dd5db557145edc8466518081eae2daa6a3345fe0de52780"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "58566334916662c6cbce6fec436d56ae63e3af932c795663d8ccf15a2f1f384e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "b61e3047275697f9b97b70a6fe8fb0341d7fc0fca798ebd3a2ccd71ce7461794"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "001c941d57578019c67b35215c59522e2aa55efd620dd747e07ee10195221fef"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "ae5a85d44ffd165690933789ea7fd3e4c0a63848348931ec5cb36c396cae2255"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "3e6ac6585a4426130ab98a317f299f7dfc90906b3a59139b1304734a7ab284cb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "6e5bb2b325d62afb6183436df2cb3910c80ed186fe612622f203a2b43803bf1a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "8cc47cbf9e71db4c4fb924fe884faa4a6fcc5188db283425c84884572ee48c18"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "61394da80e42d5a14c9520cb39ee65920e2977e3c8de5491e362f05248b364a0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "8b0dd046a77186444818a912532ccca1a042378ef36c0a05efb320660a729d2a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "c5e524d684ecbc58d89e8e0abcdb64e78e552e516b5a9f31081262beaa024e3b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "4e8b3c22cc49034a3cde273d331a7d4d7096f97e5eb4e879b6a4cc3f8ad5c267"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "e84628a865c2c9e1e6b0f875781f4ed57569ed51a987652d17b4977609afe068"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "1da6162f6a3831bbf96ee4845c6f16ae000eac58d95ac24814c0028a7bb91e9b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "7c2b64bc1fcef54f4b3f156b871f5b878a4e7f1550aa6c37a996abe833be71ca"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "25b1fbf069ff7ce4ee9cb86d952252173bf1c9424f02ffd910fd78c7eee88569"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "cdf9181666c0db4d9352fff41cb87ce2e06e49c6d51c180d677de8515e0e1d26"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "c9101831d95dbb0c3e9270ec693de66568cb2ad53012320d11af5ae5e84e2d77"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "733353b2bfbd456c5b747fb66a30a0e67a34a7c398845406e38b39698dfa3c3b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "6074877c5f18f7413b629558c7b67a7faf8c385ca4d21fbe1ea88ec7ea58c8c8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "ac836096aad864dfb30f13ceacaf7dcd3587d1915e7122febd68bf1d887209db"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "6d46f8e5738062fc6eeae786d36877b4dc61d9de1ab0b1d2f4d86defed768af5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "2a6b6fb8229fa1647b6acf3ee65529f8ced5dc996183b505974e03e8a59bf42e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "564fe10ba06774c845d8978dfc67139d87d4746b0ac01b2f8d7ea4aa2c57c25c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "f9ab8c359c0999bbd89d69e8902cd4c4f5ed1e74af80e943d6bd06fa76415ccf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 115024, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "e2d4c5b743db28d580ed2259f868e6d585163681b7a81a323062e88757887d89"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 114928, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "a46b2c89779f23b740530c81dfce09edf7b09a687789e81a784b41096b1d1abf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 115024, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "2ca4924a0a9e4219879de3ddab71970313610d3a3f11f3762cfe421a51c79904"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 114928, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "d9c5f1c77a9c4fe0bb9868ca8b5ccbd4cbc5b78ec1ddb1f2426de0d150f137dc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "e80174a7c37597b8828916e384e0b8ec69d47298cd81dd251a47e18e3e42701b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 115792, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "3f5ddc02da1672063814afa76bc7215eb7444654cc25485990a2b3558dc3038c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "706d6045f152f240abd6c4875d222473d5421ed9b62e17bc07b5298f32d42e45"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 115792, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "f50dc210947181ade8c3f823b558408c0c32b1ae1448b281a8d6e32dd18073da"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "30a61b38482f9596e916e8dfc581c081751a4f1e4dcb28a5aa3e7c5a5dcabbcc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 115792, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "255e4008f20add28dacecb5ebd04659fc9d95b848a7ea31b982178d57dac7b50"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "66cad65f7f7fa11613a441182109a8303bce072a699e2081ad4871b6bf142e18"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 115792, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "821b064478bfeebeac047bb76db74a4e079cf29f9c5bb0a980745c97a90836c2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext", 115024, 512, 0, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "40b870d7e49a6b0cb45dd5d563703ec98441f2596b45e70792657cc9cb29d567"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext", 114928, 512, 0, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "5d474fe6423d1894656088d7a3bed29198f86b95225a6776e9505e406c4e4f08"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext", 115024, 512, 0, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "7a265a1437bb05148f3e097dab70f013a7306ff7db18615f9f31a2dd56995243"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext", 114928, 512, 0, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "586809a6ad4b344b5cea809d4e7116e1b151533970df5ba04804f202033b1032"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "8044c8bcec20966d923ad888f9f5c6ea07dc45f88ca0e17efe6498fdabd3da3b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "fee581628287e163d5e11a752df0c35047c329054413f571ae416c8fe6254399"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "262e6d192903d49797d42fda56f01e5c0591d6c99bb9cb89d14a22470ead80ca"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "5dd695e08399d75e5e34c78c73d7e7b0adc88ebdac5f1a70909804a221324847"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "79bafc543def9c06590f70f6274500edb4d6bf7e1e1a1aa7cd199fb548d99be3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "251c473372513a4d7514956da751082794a43b62c12c3f4aa4ffd90ff2679932"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "0fe0db65d3a01dc194e1e828a8c3d6f4d1f5835c8965473ad341d8af220f5bfa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "f4c005f51217c7102727cc09516194dce40e0e3e95d59d48e51280ba14b27f50"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "19f5a9280f3991c94b882129a246c5f69e8eb7be48ddedf6c90399d308d41bcd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "8d12918db9ab4123bb326dca7d86d681f67f27bdd395a0709eb6808090e68773"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "e3185eb461480f61f0e6f31d06d69ee23ae84c7ab78d5a738a6ce76ee7ed6145"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "9c12c22700ff506e4c26410f9ec87ac451410f066798d25000a024eadca6359e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "967f982e7a7c40e44687a58b043e2838b3f041e62d639257e6f2efa10ff01957"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "1707255c67f471f1f4727753586f997ed99e46fc197f348a23da43a6259d7660"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "b79e6e3b36d58c177b56a936afff555054c5f06ab98c394af5b9bc6e6c3a64c8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "aeebea982677fcb17e78345e1c91cf97c2f1b646531ab7a3ba1027c98770c035"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "b5c43ae154c8091e4c2856d4b99141a8e522587f7bf5abf013d0ae3fb4132231"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "438eb76d103027a0ec8ece8535d180d1d10cbf00a7dec9c9e7cb53aa7cc93470"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "94ce4a77a97548b79d0e80f82dcb9307ce13ceac6a5e3cf7911af1088052c504"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "8d5079c8358ce3143f5bdd1934a86822c711a8029698996eaf5124fd9e3f43b4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "f87695209461612068c053ff59c0cfc98058fca7c70585530b03940d2030b0a6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "fedf414012276d826272350bc7fe725b5aa512cf8f26aa4428c8732add0cdcd5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "14c3044ee4ac30b8b40ae0365ec926fb9d66bad24e53557d09c5105df2666cbd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "90a09872c3296cb3dc1b2426573bb9b3c817a333ffa866dd4a67cd29fe6f1656"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "40aad9e7d3d06d3c2ebdb0475982087ee5e5d5f52cf34e626e763081bfdf124d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "01e15dc68bd48dc7dea2f333499cd8d3f560b6aa944f4dbb3f8e4665576cc63a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "66bb1b59e3bffc2a018533a90cfeae6a923b8d91c3496dfcf1b80767945f1385"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "43304e7a9615fc0aa0165beb27de04edb5de2eec5107c5591bd79499ec8cdd08"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "70d598807461b5a8d5eab5e3d853c19070da0c1616d282464e9affd1c94a687b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "663df8c50bd6b2da40e1e45ab41c1d46a4fe64265c159c1fbcbc0cc8782dc8bb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "8a27595871ee7b67214b59f9388b295c0e0ff1763d8fdb0eecde897472670754"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "49e0dff1e47f2d0cc84165b7077a8ac7063687d061de0f8de9aca82966e7b4c2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "e80de082476d606d81b418a3454e7a1ae87c95ac9b9639526b096ab5544fc78a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "f83cfcc4a561400c10af805660794419884257ab66ad306993807f36dfe4a66e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "0949d162e001fd906197dd990131de622284e18d2b476a314c4baa4642336655"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "91f982585499bcadd4a771038eecdcb9e35bf1a6bba6857088cd06618793df54"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "1ea718fe613cb85abecb45dadb1f93068b9f3d1973473ae8cf54ace63d815df8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "347333e724ca08d01605a4d93a17478a3184aebc1b1059387c22a23975d996c7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "4a014296b79a3bb41ae37dbcbca632410935cd5da223551a32ce13300ac07624"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "155c09b9bf16039df09211b779f649438f0b3c8482cee12033f3af6be3958f7e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "a0b9a3325ff42b73f2509ee123a30701a99dfc3f62bd3b790f9dadd565f577be"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "4fd880cf4eec162f42e129dd0c682ae024bb16d3db8fb3ce13a12eb55b11d3d4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "9014f8331886489acf1fbe50af02a59ff42b11bda1e49d67fde68320fc04d586"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "6761a7ca0509fbdf54fecc2e60ed5f250e21d640c8bc19fa539953027133d678"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "906505e961a7b7f245ec21f443e835fdd45ab0c782b1cb7712d4e34f4ec2bf68"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "953ab77f34afebadb1906d737a0f6caac45fc34c79b800a4540bd527fa755736"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "e6774eef81695bd7c7efd1aa65f1128c733cfdca3611f5a2da18a1759ca12071"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "72cef07aa9c6bab565862903be8553dccc49091b1319a4a7239851763d6f8c1b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "d896087ae749793f9c33c301a921a1da66d164051e920b28cbd6e126df34312a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "34c6bdf89db72b8886c843451cd29d6ef35605548dfbf54395aa14e6d802b731"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "4cbcf1320828ee8325f0d505b17dbfde64a8c6851180698c99abf227e74e94a1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "79bb8b1cef6bf505d58c773db09258a90376e240eef8249f4eb00d91808da4a6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "a0e3006369c447db59af2889376836f1b76a6199bfb10061211178cf284734c6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "fa7cc20711e5b338f97dbde4253d8453e177184fbc26b2590c24afa19fe9556d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "04da344d44535a43774ac64bae37a86caaca52e09440010867acd20244c1ed7e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "129d758e6b93d1c2775b74ea6f4dca32adf02f92d6d82a4ebd0de8cded2c217d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "89a4211af07c1230ab5b5194e617779697b42f6c7c77406f9b1f668ab71b9be2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "3d722aea3c2ef47fae0a1633376c38279a84c59b5455bdbc91eea849926cc965"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "b4394ccccdee540ea0b68ffed3e8742b8fd6f7eee9d7f2d99e7983928d717adc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "c0c43a39967c9f411f6b1b4d37f86255cfcf916edc09eaa00b9a49179f93ea12"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "8eb09abccabfae3a5ade44819c9c11943316014cd607587ad7e2133e4c936cca"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "9e7797bfe5bbdd2a3117585b662b33ab6869538e75e39a68aa7114f4bf42d083"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "a67d7e2537ccad683da2daac02ddcbd45dd146aaa51ca9a7274d21e089721819"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "44c9deef7fc81dc1cc01e96c12ba3fa770e7f3d90cc1df8ab9f5cfafca156052"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "36854f2531416fbf09216e7531cfbaa474e4d88cb7fa959c88447610e6ec1cd8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "3debf26892b26685433a89247105fd6135c7e76de95c547af4aa9f989e716525"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "754a4c6f5af3d303a673e934d322449601a409577e31a70848aa583abd12fb4f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "d38d0fd1679c94788d92f9b219adc2db307d6d6fc787d5bfdf904ec61e5b62af"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "738f5260b86c65a500a8f8839b8bc7794784f53f02d0c050e640ce60872812e6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "24f9b5ab713feeee216736b42656f532712306e765d024d7df442860ac07bd8e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "9a00897edf6ab7836bfc4322eeb43bc342bf640cca72d1eda28d8da7d61fe714"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "c391269451c5e0463b8f72127d152e12148861120d6037018c7536061305fb47"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "2ff4a76d58a52448ecee4c2afbe14b6ba6ae61d48230132dcfab307255760462"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "7e693877ac3ceeb8a6d3545f87375b9dd50f9096181fe997a313ac1785179c1b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "f7bad589ef77a704a2175acf62b602f49d2f5e21b5fd0c191df013196822c365"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "4dd3ecd0727b3d4dcd7dbcf0831945ffb46e97e89a02f21f55cf980af3c0028d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "d19b6780cf8dbb1292dc923f7e86275c35b9abcfaa2db0b08ede942b5589dc6d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "3912bf601ba8cec1670f9afa277668e824a3e5742349d45adc4e7afd7977f7e2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "a519e1631047fbe106f2fc7bd17021c288653a19ed88c0d5aa41323d4e0e32ea"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "e53ff21db4e1ec18f7d01f32e5459f1d3839e226b27238c80e10127d171bafc8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "cfd3af0919ffda566b6e8a07098961ce85fdcaf6e326c146135a590ec9d03fd5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "2dfde347df9303ca4b8dc2ce8c629d8dc605a4da7547c7e9c38ddc92a2879a76"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "35986e15e0a842673f993d77d25cfdb9c150a3a4d88f8e94c214f0faac91df1d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "72056d6cb1857b8c581ba70b767ae49fecafbcddaf03705e3c1292bd280db3ff"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "c3b19fb0c497047e789bbbb6fdabfc7cd984cd803e6a2801e074264b0807ce66"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "4a22a95a186123fce4e8cb7f30f4d090acbb5f75130346692e5807112960a125"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "771f23d3e99700564a65374546147bd83b1ef2ad99ca2801879956680a4d7df7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "d7823bc6574099ebe104cfb44a8a0b4d109b744cd191b8222c762f333ea621b6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "0b75f00dd1a02f788610415b2468275b1dc379a6cfc754de8607772f0a287c2d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "def2bdc523b560e82e8ac734315886cb5f5327d2de69b96c63af7f762028905b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "d82be988c67a80856b8ab11e0b99c8719dc5bc8aa2251c57b20b13986318f8db"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "5bbe2d658d89d2de478da7dff716e9d68474ed290b64e4ca3d0b87d2aa52f1ce"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "eb1f25b26224c238c2eab49a36830d43b78044d88337996ac9d4b6471d3c8424"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "062727102275fdc44071f0e468a17a7780e68c1491310d862875cb98d16572cc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "4cf581b7ff7fb2f542bcfa65153f6f53628acb118eb08c16f6485fd4369f4d53"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "b423c7581db826bfdc96df7b1821e178fa86239bfa90808c6cb5389388e93b6f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "f799da0e19b421b3c069bbdf148fda023230d88ad1bf38ac3321d41d99b76d73"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "e98d9074f87b1cede00b1df62db217c99ee6d1a72183e0d88702bae5dd7dd44e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "2fb60e4bb9948201dbff8e72a3b400eb5e66242d83241c1b0013a6433c6b8ae8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "055d14d14c022063e507e428b26d23369300d7bb5bc59b62a9a7ecf504ebc519"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "0d076ee967056d16d9b7ce38304063ceb56688518412a30f8417aabf778747a6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "574689591c257b9f20832b41cba81d35006602c34d1bf100d0c915f8f85702f4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "44552d13dae237bced12e2884f22d55f5bf42638fd2d94f5ee04d80a823328e4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "c3afaaaa6fdff1e922c2395acd6d586f66cdf54fa6fc1b18d1a50fd2137e93b6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "c71cfa081c8955b62bbaf22fd6aa52a72ef497ef07272f55a1f3166e78e6c852"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "ca197f39b788ff69df4c6cd4b95d0041e4d5e68db2fb0fdda5378efaea7d3f2c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "3da6a5ff49271de781ebc5b958816f7dc5728bbc3765317490471b0c68f65314"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "4dfa0158d95e4e9f1cbe38e525e57b33e45b10dfe138722cc568ad457ac18d7f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "c4cbb2a455f0e68bdf0bd6e2bd7525d59a6896445364e217ceb519bf7229b402"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "4cbc9ecf5ecedbefb106da7d37d9005b71dcdcd3c854387135039a7d29131aae"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "2193902c518246427e57982bacec718bf9545fa6ab575abf081a2115ba4fcbce"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "0289a537d141625248e81b4998298af22d83f25c1bac3d61a9a7c3fddf65aa5b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "f9985bd641faa4cdb4eb225678d0f6daf0a3ddf3c7fb2c43a98b8fd17f98c012"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "d8839322db76ba374c2304dc5ce8b2945a9d08778a70369752e823e010ee4fb3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "df8a8bae52a00a9db9947d3234bc87dbd38568b9fddda8a166171f7ea30bba73"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "078cbebc125eed135ec1d7807cd0b7c2a139fac623299b1afe6789fba97a358f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "eeb105a50979980b07045d7c2605ca2d3126861c3a334c4959fde6b6c29fec7a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "a3987cc9f708fe9f599b88ecfaa11042a59fc62b9926bcfc9fdd4a93d4b172bd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "06bc9871f804392cb60caa8d3ebb355bfdba450c4bebf2b0ea2931945565f8a6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "6dc8482108352b9e39012db151a8d34db2cb2ec0bdaba72d6dbd6711177a6037"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "1755a7751d6d84da34fa08c09269831d1783e66e79d8cba7a3e4f824729292d3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "0787bf7fc3130e7f2b34c98520d13328848f01f3f99a5218e3757f30554f8e57"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "7864289052c4f00802cd271528ca2a7e73d81f380c8e09ce6df175f82b3a449c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "b48974df94028764fadd235803cb58a92ace4a57d6168c1bfcf470aa4c86f538"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "159410f9f5ec89e96b1d766e327e63ef224ac2b31f3ba7a97db337ae714a19ae"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "a00e683bc1e8a58a0938493e44da605d0190ceba860cfd94b0eff5d9035beff5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "4db64a4dd48fc17a8f8f837e367e1b962acb7ccc11502bf4aaf4bc5f03f127ed"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "a866d9364a794804f700ca8bb41b6b67c78ee90dfea2d9491d046ff14931cbc2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "ac02f2b1527fd8b95396c31e6e582428b9cc4cdd918f1753134b99fa79be72c5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "e6c9db7ad54b52cc9365d1de99dd98b7213a0ed2360dd1153350b6123aa479f0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "04fb2f85028743f767d8c254e7137b690b7cfec6bac05dbdde55f5583011339c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "323bf9fd03dfaff91ae5772b034ec59d215ce22a0358e30cc62795be5476cd3c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "cad4d79f73eca2a66d0090af41d488b1989fa69e83b347f18eb47fe2e749f08e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "77b80ba112e409564e64555bf4c8696d4f3fe295017b8642b91797bcfd1c5cb5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "979b29e8e4b888b840fa7adb86c7c7551dde0f7db72d1d57407038b93119e882"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "cccf51767a1d8e9d6b5608824b1fa084c2aa0c7ecc8caf00e2e5c435a5413851"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "285fbc696ca052f197641a362adce3839bd8dcdb2e31113d344b8445621d297f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "ff5ec4a489e200ba4146f528be3651abda0a33f3ae1c89e9b298838be912eb6b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "d71a28e3287902b2bfb0cf924db78c87971cd9912f7ee7e3057387a15d4e7f73"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "b9d399d9522c1e58f580cfdd711bf3b0a482c5f949709ba39d41f3b9f40657bb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "8c91cc248ef9c5b6036f479d0fd42793ccd7e8d208622f6bb29c42ef3c583e03"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "b0129242b90f1b7ed7fa4dbc83b21a142c6a02f513f911f10a7a36d43280b4e7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "21376c2a34448920a44a362c474660a5d9131b62594b685a47b7a017a2a6e871"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "bb74839d310f072bd27d0cb95cf3b80946544ff9a6fc08a5bad66c2617865373"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "9d98eccda1b9c681cb09d83ae6516c65c6789792ddc22e9232200be07680cb92"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "29d48e5d8cad6c32c4cd67d62eaba70646ce3569a24a13fea9404ea676f6e3f6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "407f1d3f425f1efa095c42d80b85e219ec779c3b3156bd833ab6efaba491bdca"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "a6659a12a720bd00987a195275f6ec60ae8abb2a0bf6749536d7b2ea046214e6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "6171d5e628b0fa16ac8cee50a95b78eec75b410d1aaa1a2d70f1d1f7bd832f38"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "ad05012598cb1c246138480fe59a2295a5fa2cf5eecc8db6b908a11246636fe9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "5f8eaba758b7612f1668887d28426c8d2a50d9e7ed5a37a294ec728b84214fa0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "cfb8f324d671ee0b1b7067389155df5e38d8a0aabf79e907630c637c557451ba"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "52a1f7697d0e3a7f051618c11cb692604297ab1abbe660d84baa4422014cdb13"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "f1fb4f18ca44afd5c601cf3b2888354e8525ce3e92c357075a143ef27b300ad1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "092ea907a59a0cebec8a15ef84e37e5552c7feccdad8a1158c544afd2c746121"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "169596d4980e1a6153f6c65e303ad8485f88d7156c361b96833176665eeaf358"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "aba2d2fbf4159064c4993421357c1b199c3a0729e4385d4b13ed705fc0db8616"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "c6431eb4d508f4980001c2e8428c2dcdce0a4916508ea145bb5bd490346a1b58"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "e31307ac69076b2e0eefb7b188ed43a87e94b822823174de0d80b3ed259fa183"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "e087873850236daff8d043b623e09074c56dbe69258e78d9ade699403240e9b7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "08a72ecca4b10ba067925b119aa8d3f77a1be1f5788c0990dfb8d0d9dcc58cf5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "97ff6e95726a3471093061d0819d7f0a694a19ef1e9715fd91920d19eff52009"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "b7013198f8b175a60a43eec37e339352bbd23388e88514f20081b75e8ceecb48"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "f4ff98704c1965c7917d09f13b895d3bee22fbae59933e4234781aa16729483e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "29f00567369ab427e79d5f44dcca173f340acc77166e8cf883cecae021692620"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "0181e9c91c7fc40314ef8a1e174b65a0763948c485dd11f325db705b8e06c54b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "7f5292e5425edff026d66ed2480e0ca1204717577861fc6f70c1248ea2049410"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "c1f3cb08d9fcde5badbb7c29a69d9ca1d23638512e20c485a00c64759e3035a5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "a475792aee60a67e49367f3b2da169d9280edbd9fe8c4a7950c7f06979ee303d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "2bfc7931429b47157f51d7e4196f250d612cee04fc830eebf8139132f75dbb92"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "f1b5341501e06f1b57b2966e568445ae7bcafb2a835e0c23d622e77bfc558406"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "b2949163a8be74af4085fba9ca31b78f2e54afee946a6dc4d615955477b14345"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "b8ef5a4a20df39225d951d2ae663d0b20348a0fba2d0e3e3cd4d026265a730b0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "6570f7089166cc8cfafc15d2cfba38ba055c3f9dc0ee8c486d73d06ce6daed11"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "ec782c13c6491ddb95ad37684f2836ea71a519941c307b765b716073d1e29df8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "b31fae74e1e374ae360233b4c5e1ffcd2ac7a32bb7794e664b1c6c09ffc45ed3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "196ce1287aca0dc18d53ded3c95126587e2a3482d26822d0c1f677a4a46939f0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "d89c81cecf1d7f7131cc52ccf95b0e71a48ee34df8eeaacd77071cd8791232bf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "3cbd81ddf39ad353111b20ff0b761a97e0ca06053167eb1905ab444359143e2d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "a6d5f0bd66656096a2ab0b28c9a0c9c4988aad77975a24bc2a0e949c70a4b951"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "69e651fc887a4e4b0c84f53bc9f3e28f85f5ed5b1cdc773caae8fd307eac585f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "b32efe36ab2420861ece8b383601aadc8319a09e4a5e074df89b6b4cf0c4e185"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "3e0fbec30d64563e4c7545df6aea9b63f9d03243ea3c9b28d8c595b3543ad18e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "09295c5c626750a02931daea3f684fc6483292a36677dc603a531c6d13aede8b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "93c4750ee8240ac088edfea91eaf8219043fcf06ce97c02e67cfaad239cfa591"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "c582122df470a4dcf0736402ff115003f440d1179739317f2eba5173c968b25d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "bf4c676b9cc2da3033bc7dfbab9f27321bde9f19f211edb3690240c18c55b134"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "3c0bf524fca9acb985101a6cd5ad3bf945d3d120d08f8806cb87c1f9eebb6768"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "ed13dae1f9d3fa88d632ba591eee7982b06d86e2efeb881451632a63a56e96e1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "54c24edec010db9a29c6ad388adc1385f0843d8e58be61ee74647f1585d0a674"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "3ddceb1f4a2463b36e74a8b0aa4711fe6ba43b7def75703715aa4efbde34455b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "ccc1fd72f04fd94e9274977268837e3bbe6d41532cbf6af78bd0e209a8f77b05"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "e299e28b18252355f1d74f4b708fc81bcf9cfe74a938a6093e32fd9cc2aa4058"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "7ebab595b9a2f51035238f2bd35eb38cefa1fa2c14bddf66a9400df7dbb40d34"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "9024357e583110c9e59d51f64bf7d611fece78fe8ce992fa6deb22ee8575f6f1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "1384d3f8d10d72589ab5582ce8552910accdf2aafc4fa63aa8ca46444de3f1f0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "0b3aaeeae498a37a06a4b43365dee5fb3921d7944a0f6e82608863be592f1f55"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "56f02693e3e09a03ea04a19c6c2eb48c8f084096039e1b0660211b7fb824106e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "c557dec133913e8f8ec80e7f56a35ee568f8791ca1414a5eb8202760cc406dd0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "0b8fc66820a91ffbabd960f413fbc3a18434c3ebcb68e2a0a8073233167df76f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "e8afa86b932ae1a631815cc4ed2846e6d01ae8a6b12b7f1996602a415c5e3503"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "5d6c5af95b7fc9fba7df777c98df08feef14fed8cc42a97224e8a8b2ef563c7a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "dfc9ec1714ef296d663e0f6302db4c0ef0f95664d6bde2860b409629862eaddf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "250cdb241d63819e8f39a8822072370e8c766ae4bd28c6d262ef125fb39442cb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "062f21f889d45b909c3ab2731ce4d6f1039a4bbeb1d9dfa5b8bc4e3e25ac1f95"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "d3d066e2987131628ca51b2824656d9c866394d4edeea9ba9ceed43d2f81d3a5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "d356f788ccd560ead3f24b9ff2a3e828f33c26daac80eafda21f58f939bd5ed6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "831ae9812185394ef22d108d82a9935b4e37f6ed2f38c59675f3adc4ecd64477"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "c662109499412582c76086fff69e81ef865e38b3f866080ff5cd42fd96c4488d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "bb55b6755d1d061e59d3569442606e1a0b43a563d72acf4e05dae5d5fe549924"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "625f2d233c232a26d331f43fbff39166645cb7941b2cfc8b3c98ef91ff8db2a1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "e4911c8cf09220cef93e5558381afe78826343114de94fdaf623f23d9a0b704e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "8d2b669ad144a2b4331f73f1b9d934e36eb1a977ae1e1755c0cfc269e5e5c5cd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "df7781bd2868244c8760c3f20c2080904522ed02c0aff795082413fc81117b56"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "adb844751a48a66a3ff8b93e03983febf8aaa57f55dba7bf8ce19f9f098af354"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "d11e6767055dd16f699569dea8ecc85bd48452e50bc23c215b43afc3fd775ec8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "fd58654bef3aea0c996e64dd065efa34e5e75cc6462a508380dc242fcb827d1d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "d29ef3b779c8d1617afaa8a7021e7fe5043f64caac6dd5374c44d3f6b2f3ab8b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "d85bb00d8544583dccf37c9dda6c8cd72f91a7697ae346809182b45e68d998b6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "0b6a98116b7df03e50a4653a998e00fdef0435346b7783e637034db1751faf2c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "f6e09dff4930354d832b6dee15d6326ad376df8c86069229a9d119d24d207b9e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "f610d4fa2d7d484a1bdb10cb93db37a42baa01f8037f2a8e6c63fa2ef3cfb90f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "1f2f8951d209167596282d46e0f9acd8d93474f696d9a6bf066e3f43b87c5ecb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "303e2790cf9be64435bde401979c607a827079f8cea4ab916dee10c6c963d7d7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "9cda9959e0f7157514646b9b70159ac909019ee597dd240f1509496074df686e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "f8708e50dc023c3414251f866b9958c777176c1696e2197ff91008132e3dbc07"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "e0209b09861bbb789c959d26c4dba3857675dcd8b86318025c7b23538a4acb2c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "8dd42f8bd523e6fd6ce81f835e7c0398cd4221adba08a7176a12fe9a9198068d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "8b2b3f1acdb1e06da8747eeaa9487ad60d926dcfd35b424b1a517968854be71b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "ebfd288df1ab3fcd6ca69cffd2b1f9151cd26b731792322c1b6805ccafc38708"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "b16c2816948c50f357e8072123ebe113257b6074ee228fbb4631866b0d85dbeb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "02ae7aaa86e5bd1dfb5b1266a675ddfea2890be4ea1a2e5d96c94e350999a0c3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "d9ecbb2d6177371d554ca9952f208a150617451ec53afc9a778b6178dd9d825b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "e9958d552e46b4cd634a51d24e8c634004b741599aa8166e0cf9de3d9c4fd3cc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "bfc0fc4dc917009e6f4ffedc82da0ff7974dfc5ed485991bea71d9523e73f8b1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "a8058f556f559292f0bc15fce2553da85487be813e54a342188942c45dd015dc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "f695a4f942053149473a1b8cb1ad64cba44e1ca7ec2975e8a3ce3c09ac23ae45"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "7f4e12c12115710c038ade4bb0f7af041df34ae1de52fee07604b8124059b39a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "2fe67635025933903867f0081c46b548c31b47b2e87b12092dc61a083d3b0a91"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "cae48572366e2f85b78a18fd043241e1dcdb86a578c81bbdb0094188e1667bae"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "1313f74c3ec636c03f95e44ea0f2abb0e9df4f7035c0b8c33d01d7abd3df0a6b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "80d47f6c3ce5d76b107c6f01b3d4cb1b85fbae57984a4a0db97f13fafe3f25e4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "10ff5a801e11ed5956f317b4ae080d175c0e1a47911b8e4c31000898c826a853"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "bfbb7fb201930aa9e6008d46168357ae4027cda45e98f80018f2b273638c1642"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "0f2e31f925c9e92681f3ba8056356fd1c0b0bca51b0b78e34c7b80729dccf0ab"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "78de18db90273faf8c3876418510309e9a6c7141ba3554702572aeb3c0d7f9fa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "6407382bbd06232398ff3a0335c806cf03d31d000a57a8736450539a5e817677"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "2115c78ef48720f6012e8b570cd80e7eeaa5663e1e3067f7964a921251ea62eb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "1f27ed82ae1f5813538b9fed226d4ebf8177d4949889d48fac28e7091b019793"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "1b8c11888c47f4dc7a7372a9c1b3158a39cedbb991362c4707a0dd22e8fc389a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "de277f49fae921cba1778ba5a3193e8dc679dc9a917ded2ac0ea3f5c938584f1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "305a37be7b585c56fa18bf26f301de46cb3016bd027caa63450eb6ba80951013"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "0783c2cae9e75220c37bb28c0eabec678038b8b408ab4e4da8d98ab4c6265596"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "7f11dc006fccc7d4bbed2cd62986a78caa51f2427cd4ac791950268f470fd69e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "1afbb53ed426522af644b9c59a2537df16a8f3c81f8afce8151aa5fbace6596f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "f7f5eaa7ea7e8c49043f4d94ee332725b73e372639ac2fa9fe57a2244f10f840"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "404aef05249b75264da4ba04a82eda4e7680ac6fb70258258ff2e13161bd4771"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "4e0687c9a4025772bdf52c016a59076d9594b23588a9e75cb2c00fdc1cabb0c7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "5720403c59147e7df7c6751386c27d3fad698e5a5f5c042490d92a8380e103e1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "2420e4aab86b1491c994a8c4d443e4b175a43eae251e63acd260efca48e6769e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "938c7d1227152f0ce0da0031234890e07ba25cc043dec2aec955d35a356231af"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "800469be8ebf45b28a24d4e485d8354e07b328ab1a21dafcb9225c9a18141483"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "ece098eac7527842d642509a466fe2ebd5472d62dae302a83ae1b2b06a322b75"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "162278b4bcbf791563fc38b633e5e0d33941c62bab89cdbe1711792d07de7aba"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "cf6a6392db1e25c15f133eab8fabf37d9940860bf48a8ed885dc2a2ba1b9d1e3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "54cc8a3a7bbeaebc7e533de7262444c3e0f56288751167d00f7ef592a25466f5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "9a7279860faf4abe3f36ab5de469044b3310620cc4fc5643b5fc17e359bf4564"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "2fed2e427ea1daa1bf45fa7333ee9f7a61b74439c2878de6af92fc42825b838f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "c8f3181de18f668caa7f3c4bd9297d23f8fc53ff1d42ad9a768315e1202f70f6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "5d0ee235e3c60616bbeee79a72bf650399c66e9071eaa5238f96ae0170d45bb9"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "4ca2fc92f3bfb36c0c3a1fd0eada000a7dc5b06fdeea30954de902ac02c8b23a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "84d57a11afb2b376977ea83ec902175812c854b528f0cebc34a80cca7e051aa7"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "ee1af734071aae4301382ae043ebd2c3c66a87e5638071eda02075c0c30fb79d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "ec213f30a204fc56fc51441b1fc6028428d4f793fa7dfbdf2167b05d338a7095"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "fbb155f95d14b380f7f2a10d903bc3e0e3cb48b9c7b7ad2bdb6e6a3822e93c17"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "8171fe4dd507afb419fae3007ea74c6da01f1a203312ee6fdd66b06de78d9d8f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "5753279ffbe6eb584007f77cb4cf4103e0e9123efda9217c1f7520f119b55418"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "a5cceaa7cbe54ff0c2121f72ebb995c84eeaccfd555b936d2e0d22df87b07974"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "28717e816c2abde183683cc18a3ac893f5cd7da996a0ca91332480fa6ecff6c7"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "1af85ee4e913de9664d3ddb29fbd563dffff446f07a0b556cddfbf50305347a5"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "f8a7f8f5ec9bcb8726c9134905ae2477efbe06572cc0ba840a02fb12a1ddc085"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "713b63ccb85f95c9dc4ccadc0391b62f9e33c34f3e465f4bcad3dcfb9a89572d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "1432fb56d7933dfb7401891e125b6118707651e5f1bc97c873bb1deb621de908"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "e0c645e230b6f54be5f1fe37beb84e402709cadb0390941f70adc62ea30e1be5"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "2b74c0d9ee6cf37beff8064db9fa0e7fddc71462140ff3a77e628cc92f1c0a97"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "1655a76894cdc7b1edbefc33905b473c4bc77598ef14d0f387c36e7394ff1895"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "2c9b7347870de870063b271d12018765c56db4b413a89cacfd77eb62097cf2e9"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "d7004cf759fc5ac491c621fa2332d4f2689c7bb408dc5b8a90277ef2169b6306"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "0182571806cba245a1874ba3c5126e44cdee6463cf988b3ac5a2117e542d847d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "fb61b3e3b6ddefa7c66bb3a17c609a9aec8e7115c17a87a67f6e9a0d61518ca4"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "dacfb1150a1bf39d5a5f825f27f2b92b3d6ab81ad101dc13055bf2af586f4285"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "06342484ec44b0ff606d044aac636b8ddada23e93f7be8ca72d805e079e29e4c"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "75d4a0c5fc9888bd6c976bb3da9da662013b65e310ea4a23115a8af2f0686a04"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "c0c88763790f508fdb9710e1bcbb39626f4027116959cb8bbab4bf5e8e53cd73"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "bd0998514190c4b6d1518fe503228a2ed2ce394a5f585c283246f337b02f636e"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "499e52ff2a59fdea43033365bccda645538a8e003128dc6fb49529bcd4005042"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "fdbd96947f762dc0011d7636543d3d6634f273fdc7da049e0dfd699c1e2e58f0"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "7b80f275a3049e8ce564368c60b0d36b12842fd3cfd21e3268328dc59a68404b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "5a35cdbc3c500640d5740ce763721e1b1af56edd3ae53f9d3170ce42e27bcd59"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "b229501cf65f8023abc6eab4d4c9da9f010463c1ff929fbe063d883693acbd34"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "9575350a4922d3ec82c03b85b67c67bd5246957c0e05e304c89498e28ae07529"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "bc02855e80925b131caba7038ea45289f99ee8dc8fb3babc5a939d9fc8d99a4d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "ed2ef0a3c9dd8a1d501f9a7a32a4ba9ddd1b79563448eaea0cca29186c4fdec4"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "0f45bd7d748b132d477388ba9fb2f60f7794bbaa1b4991109d75c0a8ca1df89b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "d8c97886f968720be67f2740229b86de34a85cb591ef51bac40a4ba21d3ee700"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "31f47c9fd2d0502a4b0a134afe40d2b73c7818cc65ebd1193a88cdb1bdd41c67"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "ba96cdf4bb030894721e1f9aefda89e55cfb7b1d4005e92107abc25fe30a7daa"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "12d3e247323242ba7e58ece67693e69e43bca6db180ecedfadf3233b495dc519"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "885232e89d51b40767a8babf0aeda5763f062022d5069384a13a93fc1fd4618a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "041a7fa0c1fdd1937343015aa6486fbb5d5db605654b77004cab14c5b20439bc"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "364334c31b330721ba9594d256a267caa889bf7a79b432fda0f7fbb50380f565"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "ccd993ffcc242266bd141ff56342882264c6f52853d05198d30afbc6f649bd43"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "de46ff7e11b7bffa5b52c8b8771d9df29b4ec11d4c5eba1456d012846f827144"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "26a750031cfdb6013e14a7ad0fbb1e250a86682977aa13665e2a4a29669cdb30"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "a5622839ac4f817300054caef1ad000cd90fd56bc8cc44aa6d2a3c0ae00b9a8b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "13d7892615080f01bdf78e5283e6dcb34b5d7d78bb2efc827c8fa2f19bda837c"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "a296b5dbe157e2ca45f94ebbc9384d4ba1d3f73f1e6da3765da22498da1c099b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "075854b49ba51e853ff19ac5d237c606fdbc849f7e034bf9166cc60256f86ee9"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "1fe9989ccedc3ccfe39f001db3a327cae430051c994c25b9afb020b9e5a8ce95"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "ca6c2666963c668f8680941d86515daa79d906a3229e98c2aa1f278fc8f6664a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "baf469a5bb3d3ad264f632a269ed7f2d6e250dc2a6fd418ddcf5e8a0acabd380"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "7bbca64f3cf4e1c948afe738d6d8160feae17420ce64d3b4a0b703201b4ad190"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "0d6367f4815ec0759446f3ec70227ba7d6bb354a96eb339e7f60676e32ab5eb5"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "a384fc1575e4a05e20c9c838580f5b8ae6a0723eb7d9ede7275b603e385a40f8"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "e61e6475063077957fb5a0cf370f086ae0a9e10362fe8c9cefb9243e566fc3c0"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "9d4e7299d712d226314b765396682f68577e30f602d13bab9f23cd56e0b54e8f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "11fec01b44cd409855ce7b3f50eecd4195a61e33f2480c2532d83cc84944cd7d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "c32d5dffaadbe97cb69b30bdbe09417faa54ce7453701667b739d0e6806bed1e"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "050633936926e2c361ce09a2bf7cfbe8198b92fd76722bd049c6f45174e25874"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "c9f1dc442534cc2b44c598b2aaf8968d72cb6c99ef6fd137863e5f993fc31e02"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "e0e9db6677bf46a9b57e5e555eb5a33f53698647a78297d7879e7a4209add8dd"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "b275cb252658face876a3af3c462930ef8a6e6d8a261f1d6ac0167302e461513"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "5ecf20fe632c77217740c945d2594e9e170e1a2309175eb7670c0cdeecceb487"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "a0e46ed7f1bb3e45bf89ac913a0efa12c3775ecd98c85874c0b0b63d4c4d6848"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "2603f562bd896d1b0316fa7b56706de62f8b1ce4969fcb22d0ab9296ed566e33"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "b88e76a72a62af02ebae8b2bad9dd5ce32a08e75f70a19c13e4726caa5c74a8c"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "e18bffbef328cb7711886bf9e56ba4fe15aee649d910a5da0b7fba8cf43bb690"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "5061973ee2754a03b9a19fe2dc0b5bb6c2bf4db25c144ff5c3a603c828887f2d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "3ff3f299927d6e58b89127462f148d55a43b281a3169143991828ff6120cd8ef"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "a3ccd6995cac483fc874c4199d088e3478b3b94967d45bac56ede90860c26cd4"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "5d0fd9aa2621d35d343350b13278d95e95555557c77aa2e987d16648c9aa4855"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "f9a5ecd81f2f0ec0ba230f226f76e2004211d22c3dc1abcabf57c2cedb0748a4"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "c1cbf7fb22921de92fdf8f469b457e454840f92b75c02e1fc059fd2160676f71"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "5f4cfe0d20e4b5e4bce5b4f555361c1148ed850dd50fa96d2cdbc3afca357220"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "fbe7295586d783ea70baec7ee632cf0ca305ead0680aa376274597b8a21c6f2c"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "9fb33596acaa78eefa5a868d8d7a9cf90041ba149f5e1432a1a9ad3a4cabd80f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "960ca87c9f64f4b568cffe7d131206d76b1c3bb6d02c921459ef845c46f6a6bd"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "2ebd7ac2b6f9ffd70655327726961381deb251f2f1a3201f771327b6a5bf5ab9"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "a2976e6d9b348dace6778bd558efe7bb6eeb8201966db00d121ad87bb70bc172"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "4a71a8972ea2867ea8f3ae14d51305b1160401b9aa3df843dd21c8bb80db46d8"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "288a09041113933103d2f66a69b8f4c813316c330c638f047c747994f33529ee"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "86125638f1b4586de5860b1b204e6792690cb3714582e867e563c773cfcdee68"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "366d8937d403cb617d0683ab01b101a48ceb0c3ca56064f13f9c1b6673c3beb6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "8f1d704fe0920cf9c06d25b763180889a964c4e27743426ff68fa0170cfa651a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "fd86aa07cace6b5de6cf0b204fca9893c4dfbed5f30977a72f9f618138caaf3f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "5565fc0a789f3d76d6126e8722eebcda6bd29c5d6e1bd237ccc888261f9aea56"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "823d3af1d7e6e8a085ab8094bfe491d11bad5dc1757079685c19fb53286564f4"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "0d8d6fe68674e66c5148f41c30b8aa50ac19a1a06d28f525ab723f0ca9d7753f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "a61ce0b421eb60cf1f879c48db97a8a588c72ed2378f35eaa1de1cf09504b66f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "ff23196d2f77a1b5f7e1591a94e8fdc9686bbe1d977deeaf8bbaa2a85ab7d796"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "f32fa3e13368aae2277910b13da6e05cb3e16c0b3f6c7a66b3c00b8e0c4694d7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "14c24a78427d6b9132d7cbde51699a4d57520c813c22ac748316c34eb8d6f3f7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "824a424aedf7730d8e733c2d75fe2db1703ee5ce5debde5a4dbd7fcaeb13e871"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "ab06d90451022e4b7ba9bf9b684ef7bba4ccadaf06e51875cf812ff26b2dc0a7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200808, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "77629b353e766b6dec2c63a16ca784b4e1041b00459828514db22f367607b8ed"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "4528e9b70b381f32ece9e4b4ae7f9163d85709748f9284043932a499d2a5baf5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "bf36773d1f31058b94efef61fd00d7d38544f62d088c889a8b27166e56c6f64f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "3447397a63e6c6e55aef0812f4bd6bbdd8c9be9f598c8e056faf8fc72d64c392"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "b567639856ebfb10fbff4bdfacea734e246a960463c8820e85b7aff9cc95c564"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "78bccab666a4dfac1cb2f77cddd267f894bb6cd9a3e085afdeb9a052b58440f3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169152, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "671f4abe344f421e1317f0431ff2764b0ad23ed5e888c1fe57c2a456f5311b3c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "25e95c9fbcbe5d91586d868ec65fc2d280e47ff5b7d0014ab4eacc3bbcf03ee4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "8d11f5f0f3d74d3c2e661b28cae25c0fd2a05954f39c194f4dda0e9565f82b3f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "cdc3c3f1533bc9afe40e94aa84e335d048cceefc33b97c28a5a76694ec02900a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200808, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "8d8f5e70c461afb0988e53d585e85192def93610f5b511bfb0c47dfa0f61f9f5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "bd2ea71c0cd30277cd76baa8b7bf195cfe83516275b2597cc5dd1145c01c6ba1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "44baf176f5545d3a5f9a10096e72abba6b05b79a083de0b6b90b1e3dec39197e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "51a30ab5f2fbc81162c604e6735b5642a74e162edb585173b983a220a69c81d4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "d53ac4b0e391a340d203000b2433ab1a0a3bae3f0c1008c46aba95e9ea0878b8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "51b4798907bcfc10f7b13c4ae2efe2587e18fb59cd3cea4360ebf156079d0a83"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169152, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "0134bd1ef0567e3cbab0de0bf937c868ee147d2cc523caba232ebed686bb261e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "a29fb24a25a54dce81d34cbe0b96eb4686218695dcc3f3258ab77ed8131405e5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "db077f15bf3f3c73d76910e6dd6434fc69400d76d076fc871c68437966f8a856"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "cb274d9ea15dbd0bf366e46513dc708cc1860d7856c13c4d0937f58530196887"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200808, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "7cc7110f5c2ce16b11b5cfb1829c0a89e52c50b48dd8ceea9eb67ed4086fe167"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "52ced5a54bab8eb3783200e9da83250edb6e2226420712e1cd3ebc77b2e9a26a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "dd1d6113ab76dcde767a4b5e3d4c474c321393376a31c50f3db725859df2f8b9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "3a49d0401d5a61abb8431f76ff0a814a0877cef3b04b77fb5d76a75796d0014b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "fe5c4201fd9635236211497b9ebf80785c41bf334371e19bba199a4ed54246e7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "cc648a2e8a28498a7768e5e452033c626556b01529861e5f10d43c67ca98e2cb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169152, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "45520ef7bde47de8c330c88b665dcdcbe1505230a5fc07df7dd5d0025eec307d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "9d343f8a2f5d7fb7b67a815b7e17367d426f8b2bbdbfb7b97cda65c281fd6fa9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "da4ffaf3d197121fe0cdf9dfcde29a24b8d996df8a512970f50880a7b3319a16"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "4c4ad56aa95f60d0725425942e5561f842a7ad6e91bb4b4197aeee82885382cc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200808, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "befa164962501d3f08eeb6f2c50fdce8af81ff78fcc6c519ecb2812b2e234127"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "817e6e3ed10f026c771d042d5708a7df7e80b013344c71f5825740024bcfd6d9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "faab5893dc72d17ae1c00867c4b5e89f75f3bc35f9333dfe142ccdaf5f9fdee6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "bd7fded0c913cecaa02c9bbbcd3419884d1b7aa704632025a22b36bc2b56dbd7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "1c4afe8d3c5d733cf568d0216876a8f6d02c1d92afca597009e14c78e5e84c0a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "5ba286a703ca4e0f9cfafb5daf66391b1f0f6a2afb86ef08a07e86908541ce30"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169152, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "7550b4f5793706f86bb64b77afe991eb338ae4073e16498a63f6e4b81238b1f4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "b445d2837527b9370b6f7d597e92cc10e415f1fbcd8aa0e394dc5d7f4f3d15fe"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "415d2447d14380ea7629207175340a401985b244e8934e5148b4d251861c7c1a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "27a554c116988442f3b5c2d9f83b0d7ded87af34674a95b3763e8aa74c1d05fb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "198ed1a56ec46271b7f74f4be8b46a79955f26b6e334c26be6fab0c703858977"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 224480, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "3f82f89c3c3f141a0f1e30540cfcabc94a213c6536ddfa7c4cb647b3a4b7869e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "8d0adc12f915b0fc14ead7cc5c759f98f8cb14903dd6ff0864681c100b487e54"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 224480, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "3b5f1528798b5a2a3b86f0a6a9cec2cdcc14ee5930471c5206d640dd5f27237d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213608, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "b1af9de830d57b88008525d52796371fbbd8f9e7390420bc4ac3dbc9789b2644"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "dfec7522c2b076bd38b14d161bcf1ba383a61a5578fe22a36da250d28c1fe13c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "91f602314910268b3b6ad4d789e18e7a4a8c40ce91ef3c5a1282f7fddc5d8424"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "fe8e91b2fd77685df6758684faea3b5341ef660ac174fe68f10e38e9f43699fa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "5ceb43857b8ea2bb072af2917dce3c6667b2f67f2983881f874a84b5e73d22e8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 224480, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "556e5514babb68b41e821e7b85cd8e05d4308e7ce61e1030feb0fb0c69dbdd32"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "8e612c045b112b1bd74ddadb67f1b8161bcf066f6ca849f2b20126de71340ad7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "283c62ee64ba494907412894525c53fe929a5151e165d741a4c9a063442189c5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "319e22d5c0e971cc5aa5d07057128017ad762b47501ecf102f877f5d78b6ad1f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "b377e6c6867d128ecb1487919797b306b06bbcf4bc907e7bfcddf8c1079b2e17"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213608, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "39df11179d3c7d9ee15ba900e03daee1bb7dc6929e11a4bc48e8ee0992a5d1e4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "e35ca1450026080034bbedb6eb45684c289bb3040003a14dcaf324fe4e1ef396"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "7b24f2df72e1f826cac3a72adc9ab3d59156b43cb969c2940cf468109ce3044c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "ebc18dbbddc41d27ec78fe92737b3776800bbe7a6f80999023af23d078d623ac"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "d152b80022258604ef6958b332e59faaab75df06da80dc564f6e8de1261d27a1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 224480, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "8233b1098a6895ea2afbf00adfc248d77a5f44f5bd96300bf3863bbb037aae18"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "8a4223960165b1a472ecbdbd7668a90cb0b25f49274360b1fec7daae985cd1bc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "5bc63e5308fe08fb1994df68cf721041eba92ca467a2e9a6f439c3dc750c03b2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "e27fd15f4cf5d8dec6c4a35ad6cef16848ab5c2f84b3301e3ff23b558294f2fa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "f155aa06140a6fa7c820c44b875e8440ef3450b46290e232291c72ac4d098545"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213608, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "700224751556497fef1b3617c9de5ce828a4abfb66cd4f6b6ca14a2a4fa29f7e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "3d2cb44a6ec89fcbbf738f9bf9223f2c20495266e098bc75f109b44865c03493"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "fdcf7da6f1e900b722b501a430ae71e9a979ec1a7ad744c57be4259636b341e1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "bc9c26102de6f44834bb3b33867a28ff2ec04b054bf3da022318c16c3850d937"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "29b580b98ecf0cea96f12fa6f864296a341cb06f08e620577a5167e206d8dd50"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 224480, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "f8453a6bcdf46545c30b4e156a37de6d15c15b4129cf659c7600a00dcac75e20"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "d1bb49f09e9af2c1e053402458d3a12067f5e755fc8cdfa1793a9cc09615b9e1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "f0ffc51dd0825bbb98dcf7e37c33337e2ce4e7c2c7c28be0143f86ba7d5fe17f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "297420f262929b6132970e7c79ea86f9e1058d3e87573fc65b3873817c0d5f27"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "4c1a98aec32e9e8f20f95afb0a296678847145256214ee6a918f8121a8152917"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213608, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "b4df708e9c6af95994a74a95f467f92c5901beb458edfef7d9eb40d428874237"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "9dbfdfa3b19bc771c2e55e642ee488bda1a29d760bfe934adb9fb0b314f74b84"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "f80cfb4c29411c5dd1b2de19389bca9fed58636d2c7a9c5e571c59b6d1290e97"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "0974830425566a38ea4000e5537c721da6bb83be125b52a0ae7f3077f6ab60df"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "c56074446cbc5162dddc69d52f6584fc13f6f6c5d78741ecdefa4c04dcd61949"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 224480, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "aa9453b4c7973349b772598a0c0951da809aeff72379592e6e5a952b354693b2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "81999effcaa6e6aa799769cadd3e085bce1359f1e3dd53b3f80582677d7c1f2c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "91bca4aededea90e55fd717cc3e9af2069a450f504fda58901ce8efc1e34b7e6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "c2140a05c977cae261c380ba1d0f8003e4e6dd09bcf6f8d2eb323388d28d9e9f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "a77bf6019aa45d24c1660563d23577e564c42b964ad3c1dfa62c87129579ea04"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "42573892e528518e0097ef936e113b754f0795c074337f139895e1c126a530a1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "2144cda8c2bea4ac6b49802ebc1bd532a76b5120d644e689ac69b0c0c5f7b87c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "4dd5df7da8275391ced741624c1ec200c11d4e1db80aedeb5c861467b097b67a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "f906fbb038ae6ec8b40faa9fb3d1727a60651d40eebe6a87b761d4c8d014639d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "53833e07d8a9a74adc6bd82ea63e5ddd9570b8b74342bb3ce31303d025bf4ed7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "a435a4e40341ebfcb818d370e5401286f0c18c4bd4f19f47ddae1b9d9aafc997"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "56399b0b44fd77509ec28631f3ac2a9493a23ef0fe1fb1b55a6176778ad7dd23"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "cce19c7061002e24c75341fa9c25046d17806480b4c577fa87ca02b964bff6d4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "4e4c5518894c914c47871d7d4b304b35fbc3262d0ea0b7a615883d859236cb01"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "e4090856e8dcfcf116eeeeb55320a5d5c2341d2d88ff134174300cf7f776bddc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "a0486a77dbd7afc20e9857613db198f5b67aaa52f34f28299468e9d75ac59743"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "c6a084cb107cbd5ca292b3d02d878e38eb6a63b78635e7110ac13b4e9660b041"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "bab10554729edb34b243a642845e63424fa3b0e0c3037536945d943d702aba38"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "9df4ff43dfa1da78e0c3554ee3a69fe835e2f83605a3e5d6a600abf732099ace"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "a1ffd3e275fd860da4b471dd94f12b67cba264b6c390b84c631dbfdfbc5a8fe2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "f953f7f2cdac01799cd0cb0efe7c0dd96388fbef7f0071f9f998e45605ea2b7c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "78ba6a7a5d6676e725341069d2930fc8ad22c1996dd32ef69e6f46ea99eb59da"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "4c11433697e2954da90b624f081b320fc191fa6ba260863f8660e8b76b761211"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "56a0bd52d74331f6c9143b04f196da42728a0519b85816b644645d6982827d7e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "95efdebfdd8f9c8d78e57bc3f9116ef2febd79801f16363ea87ea6f8c37cb1cc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "b811665e558ba8d24e6d459a72349be39442fb17501cde58b8cf5a99c8ac6d6b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "5fd5b040c54bebf8d394fd2f2702af0b39395956422a4b9c396f21c569d0e071"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "682cf0783fadc0a853876c4619778dd787b5cd46a929bc3c414c139bf971e795"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "6c3dfede6ae2d70b437fb9f4e32f26476003d4f380228b89f8418e5e6e12b816"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "66b50d634e1a99bb1a852f62b94314f3c3697b597da790bdca7539dc113a54b7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "5b1a33a6e17af42c4777fce9929083e6069da647c6e33a6be99845edbbd8406b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "69a75b92e21322008b1c59b9fa4ff9556b4b28906e8d89b4d570a97c4b592c13"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "ccbc58cc36c741553e651514f93e66170d0880b4b49a23aec53b3980a1adf66d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "c097e1b131f58f9f3d77995d623fa448c422656ed26f2ebcfd788bb5ce79dd7c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "5198e4d8b2474d7d071ef2f78b9a58e88eea7a44dad73b3dbc4920bc9c2af88a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "9bdf3c0adf5c25c42fe184398fa109b339d44e5e2764bcdcf96500d3e6d99c27"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "71aeb2c64cf8a3ffe50ee17c764a4f5cdf95726ea2ff8e289cc91683d4d55cc0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "5a43047acfd55f0ee8a050445d873f70c3bf818d86a30118686891f2fe47b670"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "2cdffe933bd68050f05fe53d4e5b39d5832392ec6cd62ec2bdfe0769ac7e6168"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "2057d91ec29a9b9893a0def837bd058ce13aa7f8903b18dfc0c4d3a693fd95d3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "8b3e0e8b2793812dd3a05b604adc1894eae6b5e5e4ec4406a0eaaa9933304e01"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "1ab36b15604b7e31309f9a2371c112810ba44bf2f93812ec42daadddff89a323"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "f9bff451ffb399d1c241a8668113b46e9d1598955ccd3fbb93ac9e94097a58ba"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "99a3d3db28eab3b4df2ccb65579172ef234c120eb6e18acfb38d744be500fba2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "c3080af136240c77d49641238a834991ee8fa32d9787a312f1c174fe933bac6d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "2862d19bb224672aa2047fa3adf27f2c4144b934be78cd51d2abc683c9e1d75c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "ab11a6de201ff800efdb09a39344c21259b90a74096184d890648d6a64719643"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "ecd809bc19a789b77213ee0c966283fd558671cc8ff566aad57973f868f486b3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "1d9678be6870bdd8a7f9ed91f7124f8001dee6743465b8fe39ca441abbf1a444"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "0a6e014720466fecf48e0babbf023e0fd95c8ccda72b305cf26d0a590ff7ea40"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "93cb6625b64c2d81ddc4ba092a922db9cd5d518f010c85f687a8c60e10809fb4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "69a20a408dc953fa5a15dad51312d709bc08d8825ea6706f5ccf5c2b8928c57d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "32933babb9b8f4c37f998c47528248ad37978fb0238aeeae91e5f8d3c7dd2784"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "feb1700249f26b23b5abf8875cec591127d9911af108d149e732dffd4b668b04"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "c57817af1c9bd91964abde4ce96c31d930bc674e03f74c9492b07709e11772a6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "7ebfefeb4ca04346fc38d3e82477e110e151b8e2fe8ef59bb9696930fbb4bab5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "dd0c408c9599fb264791d3818a56c054595c9485633a49534167315fa34fbb8b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "471421edab6233d0cc205b7565c446315e42774a41e7bfb722b4ca715d0d1c7d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "04dc9b7c20ed721097594cac96e00d78a84d611b135b032ff823a239e2e8d457"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "59e28ebf81a69f7b9111dcc4522057958e67db7013302c64cc0f33198d6a6e68"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "451fcc4f907b033255c560c628f0804650241df034d265658a95814329ad38d5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "96d5926944e756fefc950b41ec6557e88e512d81dd6c2512707d3d4b089c6915"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "8be299770686f7ae26e3387a225ed47cac71bb58bc042d65c364de08a5ede0e1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "a211f6ae311cba8bcdf9962edd8bee7384be4786effa326925a51c66a13176c2"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "274bccae981ad8a3dca525aa5316543417e4bfc3bd811ce96b8c2f89ffae527b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "b11ea66353bd9c45077f01d9c25ed0a6d7e5e0cb50f5885a25057451c7597322"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "f3c3d523fc558df5f3032cd8852d3dec3e900d3d8c89c28067fb7df7261d9d73"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "317f4f0998ffd8aad90222ee12c8cdf6e0bea5f5af584804df5000c79994debf"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "f471e4169d105d191c324ca4cfe35bf8afacb2f5b65402e9e252cf39b81dce90"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228328, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "cea8a8b847fd973a666dc4d4b54f2dbf65eb509f3f1affe6c10a12b9248820a4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 228344, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, true, "569f21458a32c41a3f6bf8161f8192421a56f2f699afb0533f8addd45cac68dd"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "dc4848f2a0cad311ee314d14c3202e1830e6330cff10fd2840fbe38127f09d6e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 164976, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, true, "bfd5ceaf88785d2dfe5dd244e8dcb7f8917ff6c153a3f77f38bdccaff3f581a7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 197824, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "2f0883c7de99736a164549945ff1338cea80d52affd2cd3068118f67e5f24b0b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 164944, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "79564b4850e055cbd5cae63f4fa254f046c58fcbb374ac445b72c7441ff6934d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 197840, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, true, "18b325b77b28bb80cdf727f96c19e1712c2b565d1adb4564eb73934f96ffd182"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, true, "70acfb57e77d2d456bb880064806a1c370fb6b5d2c99398ed24d64d7e03c6180"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228328, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "b5a251cb52861a0b2fc2f10a62d935f30b12fa128200586170213311857840cf"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 228344, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, true, "a28a87327645eefffd5a7ce0366285a51215e243a9f8d8147fa0e717a9f9e90d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "b574b32ce78c227b7b0df70cdf66aac70eccf1c230b8e69483ea0478ca913de9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 164976, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, true, "cd3c57932b4979a9951587f8e3d7e6e004104a6490bd2c96fe663998f91730a4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 197824, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "958a352d9313012ce002c134369d9e9c4c4287fdcd6ee57926a94f4af35b1397"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 164944, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "ea158d8513d5cc5a2820076279b1e6430886bb4a9b29c3d2f13c26ea224c5ce9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 197840, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, true, "864b0cc6b129f7d12355160a95d55add30a0c1f80e3af8836dc14b3bbb370f79"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, true, "98faa67a5b9f79bc05b92364191684162b233f1bc5cf6c8e054f8e01cacfc029"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 183400, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "6d657f08921e6ebb8e89a21126f4a7999c73959d0a24841ace689f84e36a998b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174696, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "6a34f554f2dcd3db7e076b0e2d2341c040eb3a58b0263868d8c3ba76f7fc377e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 180408, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "a0648406b9918f726bb7fe8d2e520dce5217a2e17e338099921dfa4c5dbafa64"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 173624, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "4bf8435efa25d1c8e11697c9eaf8b8d0b016f24515623ca8fd87f583bc5cfb7b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "90ac2a3e7ea33106cbc302c8e486f200a456d880d45b9b589d975d9e51a051ef"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "3b8426eaf459d8fc860218de30d9fc033672865df5a11c79acb0f3c9189a191a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "4f82cdf2548df78d833f4262dc1b8b3578abf0c3148f256f85ac3505d878fe1d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "cd6a97a2251d8f2d26bbf9de06a60f782b3423f1a484a8b271d838604d3068a5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "57deb6c2685897687abe3f71070f266d9bf8e14d91639bb73c57fbf7c333c727"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "b498be6892ccefde5b822612a35f739e0fbc124deac60e79bf779ed1cd52cf6e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "24a477a27fe7fee5dcc8490b6e161ec3969a26a8ce4ff9568e2772b6f7122318"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "e444951e3f113a732e0c2746d69de2bc872e91c3103b86098e32404bc646f3d3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "fd8c4bf4a14ecebaf434ad4f46d6c38bac602d3c7c9036245a57eb1fb993ad15"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "063ff7061f8e19dc95dd9190501e50f076eba57cd50cb386d60ba7fdad9ed8c9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "0c01357de4a6c9876bfd57ecc5d960522a447525f9c73c51250141609aeb8832"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "86ee6cfd28e14858b92bbbd57ef9cffc65a62b99758ee51eb0e1a2cc1a83102d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 150800, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "206462e6c9d24388381692c8d571377dd0c97c56a3d522dd161ccf6c09b0a505"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "da7a04f28dad6cdbe55c54844ffe826bc48a81733276236942c94b9ba82bd96d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 141968, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "a93c00acf26277d21c52e6151ef1e07f642b03e9f77e4ba4bfbe312b86e60bec"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "3445af08f3e076d1be255fcbf673ec59d8f12070f64d5f978365c6250b57f3a5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 183400, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "4f4944bcb2640bf0ab19f316c99a5e6679f1b285eec89569e3bcf6f20ad7a1eb"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174696, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "f41f874f060940bd200c2f7d409d47c9efd2c52deaa1c7fdc2d13ba76c5394aa"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 180408, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "ce7022ca802057ef42f2d9496a05abdbea4c373685ab485bf46a64054eaf0f5c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 173624, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "f66db65a9e46d83b942ed4efd6e428a674dd25bb505c52ca4b654230d7400faf"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "1cee96eb035b9303e3270ff6939948332c536a3e50227b51e03fc822706ceef0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "54558fa52eb51803d8fe7bc37a0eeb17f0cc29e5ea04b25ed14099ec61ce9555"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "8b039f2d151e5e77304ea8aa0eaea80bd14908159d0de26ca7b6b787d55166aa"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "4c5892962411c5177352ae0fbc2c92cd21fc3f92ffcbd3c779d1794e495b060e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "b8ae62ea22baec54f8fd3c2e6a1119b3407d60dfb85a5475d12bda7a4e363b8d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "4df9bb2f36b51e9caecc1bcc7e5d0d99125bf9d34ff6d74dac882734c97951e1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "76f4b9d7e140e05116305e241f9eb28cf2e684f4f8d1fd8a60392b1916f08dbd"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "c78a492f5b3f17a18c4a3171b74a1f4deff234fa1d351de2640d318b0870c857"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "d3eb77871ae5011ea0896bacc4fbef60087a577e5fa441a96c878e832b31be41"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "72b5867f52433533f59910a0ae1260ff14653d84bce33105b007e4839dddd758"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "2251129c87c9309f5581d2d810a40576db76f35eecc61c69c7facbb11ef128ed"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "5e3bf554466aaed62d335b8c7937fd6e6e47960d354c8df98dd121221e852b75"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 150800, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "b5120246bee0d430c93f475c1689fb0ff40d290cc436cb54ec08e8a7d65bdbeb"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "acf4da5d7252320d64a27b79033392eaf91e6b7d45fd66c10075445083605ddf"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 141968, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "969a6561b92be307f1d37d746e4e0ee3225a13579e4624068ebe8ae44786d6f9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "7cd63c1c16c1381e30932d1f7a884956abfa22d5733f267e40cc84dca3dec7b2"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 183400, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "f46e01582e0a9888571bb75883764e12da2187f991c46be4340ff4ebff12c467"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174696, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "02a8f66e7e2af116a31d89d5f1e3c47febd6b2fb9771a8051cfdd43b168d5bc7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 180408, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "97d6a981f4f222102614b65474a108066a99fe12d975c8652161e8e7851644be"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 173624, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "94475dc0411a7792657f306d00e4b577c9bfc501247e1b2b8208337898cc16e0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "d3c8b753a9d1d02520a6dff1e75a62276e3c07f9e47090d81f8f3ac5ef006fe7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "2d7e57083c95f2ee64cbfa1ede75717faa11f26b593787c3f46e0a354582e4ca"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "67b8982a6e09127560bf8bad56a61f970e4f9a65685ef552ef2c24602f23aa2c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "9df1a32caa52a26f909e1a3e048d76ea40a1370a4d8b7280db072822f67d0879"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "c635860867bc8b0fb5c28492de95b1763481eb9698d50f49fbc8d9071350dc07"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "4e152b7d8954c9623a893d1ec5083c302f1bf1db8648e9cd6df19d22449a9fcc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "3cb5b433e8ac843e63488788c6854b8296e38bc93e106776266d0a8621a675f9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "ee6cabac6cef91e8fea81ac6b40ade16a437a6b7d30783dea7fff671d888527b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "37c87ed822288db09615db5577058ddba5852408fe17b8ec36e3a76f8dab1246"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "67563d404b5b5421dd60268f7fe48abf0c857ff6f62e25414a50f6da04e0fc02"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "a895e2f4b106fd57f6f873b91053d03ad93a5a7a1da5c3f59496b5998bfa1204"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "0e9934ba4224168ff57a3d87a61d7302425e2eeaab3b71740682a7d3f67e88c0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 150800, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "a50cb1be8c27ae2b5ed792a6c6fa7d27094a0518594a8f86ad4178009d43d34e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "da94593a3e9da8cdb1648bff4ba08b183dd2a94f260318b8692ec9be7d9da1a2"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 141968, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "3a6825446ada62a879d575b75dc90fc3767f17e7367602f6723a6af57c915841"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "1414e31e8b0633d973508a70f5d70613d3725fa131774a603f5f307f0966eace"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 183400, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "ca1d6cdf7efd4f769dd3fc8b936866df06754ff7e8265f012368bb717f3c537a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174696, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "fac3a2087b5d0f3cd9cbecbbb9fe94a558e6bb0950cb647d1b8c3ab189aa5bc5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 180408, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "c257d6f5539de3e66822ddb062472b5a99b01c9129ec711f5106c2cee6525216"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 173624, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "97cd43ea8d5e5c00ef075a88bfc3c165b430cff7e1881cc93459933e09cdf5fb"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "69f0f7e7661b7628a5c25d80de0869c47b922ee75c069a1bf780841720da7a51"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "591f3c5cc7d675f4c0c6aa02c5f05139cb2e85dde9d5db9e1fa2051aff3835bb"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "9638fdc98e812cc609dbfd437ac4a42ab3b62b0585f7dd7566ffb9519d52bf1a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "e2e8f7b507a487e6aa3e6c01a07f5c1f512551badd7b6c19155fa5a1ec51abb0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "3930acb5890334d15041630c844358c76fa1eb16152e80207c87bfe9a02bac09"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "828782b97f9745c5289b381ae3bb4391468cbd842f543affc093359c86a7f8e4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "a91b03e3f39007812c8ea8dc721e99f30eb390bc240bd4fdde926c1132c8b951"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "9f9d814f6ab3ca317fdc2216a8421521b1d973c067c7a066e8df646d9c4c4cc9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "6f3d6c29deb079f8b3f75a30ebc9194b4d56c6aaa83cc318e6c8801066e47b00"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "96ca71a281d144de11a45e4e4c276c5a4fd0e95778dc2f6f2ee9417407a48b45"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "7322229b6498a99d7a4690043e609de420ff4a9a0f869ed50150a7166a37b1bc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "cbe54c6519cc546a7120ddd79116accc2993b5deb0ef3525c294e77f198c7ac6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 150800, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "026d614ac620e0e27b2d706783b95647526413125cba4d106ae9a9e8f4f7a1d7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "a028959fc5a2e90f5dae448d437d218e76634ce1dcb923369e8500336bef93e8"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 141968, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "65b2f9bb28b0671d643901d95ec3156a8c438dc364860cf4c08d246968ba88cc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "bfaa446d1e7303f07e51df75fe75348f18ad960b75d91035a8533624856829b7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "3e93c67f8d8bb58c6fc44b09a92c4b1597ff8d67676daaf5368ea2fdc8d02288"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "6276aed6c72e8f92a0d017eb9670c2a26c634ecd06abd87205b13a6b867acad5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "fe172ce7955b461f33c1067f6753f757084fe2ee59374ab9c5b306161c6a06d3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "4eb3d1f8a21e2cffb69338e6a12cfc7eeff0589b230b8dafa37b221214a6ffcf"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "9e8e745b64562cecbdc7c28b99a45a7976d306763d84131ff69321242151b5da"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "3f0e6e2f17e0c2a07ed58fcfdad14186d2c8f2f23434325b97fac3df05a4bd3a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "4c90769b2b34940a2121bdd9e7209601549e50b7aa60db32e88c44e454c1dec4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "02d44ea5ea28e5c7860a4e01381406002cad1714a53640a820e613aa949c30e7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "42f517c6eac1494387908895f2413479b8a0ca5e64800e97c269f68984a56bf1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "1ccf4ab0a2fc996e93a7a37a29bb5bd97057616827f013b87ba9f1bd8cd7ab51"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191080, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "15af6aa839653a5a0026d0716042eb13c72bccde93cf16b07a725e707a0b3e25"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 178280, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "6d7bd6c2ca43f18879721a9e925bf3e54271aa605c5c49b0ac2c30d2d85a62f9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "00ad5c382d065c13debb99f430a6a6473329d810652f6fb0261a6ff260f5357f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "4be27a50a05deda113de8fd1a4058a0de3029ac1f23391c4221c7ee2ed273aea"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "e00474b2efdd3217b8cdb714e0e4cb2b639e289e46312a89e62f9cb88c369ec7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "e659e22519e49182bf726c635cc0cb0ab2b913b8ae0cd53cc4fca8640aae3dc5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "e1ac036f340c18e81d0814f572dec0a13908ceec24afa888b0e3aae5100d9763"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "ff9d5dd73f9cb1af0ebcbd7ef9c418059a317518e8eada4b90679b19e6151011"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "24712f57bc44e9cdbf26741172ada202cbb907dd4ee302014f9db5f70d31188b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "a75a7be5dcb780f2d21347b5bf980fdaca3073f16b6327644040cf158436aecd"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191080, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "32a48cac5866130e1f5bc5a2ff6a106ee2c68c152bb3e731ce2e8d061eab5a5d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 178280, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "bd4a2af3e6a2e0c98dae78aeac8273f15941fc2de4f24dd94421faaa8770fe76"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "0b3bab6de191f736d3b66817c88989c87fd7aee4ea0d82dc320991ceb718de12"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "f8f40f1f15dc74e7ea99c6aff5b85f32e210476bb78e8781f0a22b458f9f0bca"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "f60bde75ec707d2a6454cf8c09b3c1e7499433bc885a9e143196d493e771811b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "866da34d90378b94b62209aaa744c4022e629d7cda3aeeeebdc37cc1ed387694"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "bba2af218a97378ac4cf9a90a5ca391dec6152cda720c0ef3dfe06edd4af3cd8"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "34b53b72b9a11d815ab0b829d15a8580396b19989c6f1ee256ce4451f1871480"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "146221a72728bd9241afbf7a91d7317ece87814dd33f632005e4a91b9ef5dc76"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "1cc6363d53c7bd1871dc6e6561fc037bb3da3c43569beebe47d099bff8b32923"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191080, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "7c8d0ae2c7dcadd9bf42700d7353bade3245812c9bea29a0dbd660a57b92493d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 178280, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "f6f67061f5f1e75e18d791511832384b7549996e18dbd0faed0073c07a1bad16"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "c8825dd964e8c26b83fd3664315f17d2f14ba003be5b27f84228d2cf5b9bcf47"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "c3746355542314520af185ad48e3e1836a2a12ff6d6d3ec6de7e6275c9ace7fe"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "7fad54ac62cfb7a88123437564e81a92725731e34be1c284ce94d9b987c7e960"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "16f7c1d8d4d0ec27fe18e15a14e6d73137df1edb9d11ec59c70214722e9a2d11"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "391e9393ef8d87e8758594deac9146041b4be5549193382c49fd677392c9988d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "ebb882847c9d22bb12953abf43d805718499872ad3ef6445c1fca2c3d482f465"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "820b2bf233fd86d4969fb903ef30706d56c24026238d604a21a8cb11621a7b63"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "34dfed88fa283536452625fa48087245a7f7b21230e4033e082f6be7ffd095be"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191080, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "a12e15e58bf4153a4ad497383fca028564820eaf30c40237a7186e776b6c50d6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 178280, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "59e7fc40463d93a9a926670cc432ee3165a4e4f667c9abffbd3765b3ac8100b0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "3a7c6294bbece794f61fb92763e445bce7b4d999aa9a5e1dc835ac31dbd0cf3b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "6a41e6067037b53b8f3d47caf0d698f7e3797f37d203d15db31152be9b56df19"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "eb6c9d9f5e586bfee7fb28d29dd4cc7486cbefec26332183ff35258aa057c899"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "d58b91a4fa53281398af964664f56891dd699ef5a5872f549787fbb016ffc489"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "d42a3b45b4cc0c400cbf65362dc038018cd919485e451a93d6b1b10f4dcef633"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "64de2d3ea2ffa2ce6d44412b1223dda3b31043828defe89436369c16e18c3e50"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "a6ccd3b53a387dd4f329d0e9dd41af5298b3975a697279939f1642bff6f17c8f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "9daf6f5098b1fae09b6bf186f699ce20dcddc1a48dd0e6e2d5ac0d066a1bce05"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "87f79a0662e251079ae7fc50de64eb35d2ad18f2afa117071652b77df83438bc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "6a29f4dd303638833f57d15811c0374f2a06ba5f8a5d3278351ae3fb2ddbe559"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "350657d65876b4b36fa742ac995742820aa208f146488f18d6153473efb2925d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "09f15d7bb78563f195b672bd921f57bfbdf130e153da4656402bcf3ab076b044"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "ce23fc0029a27106966951767bb492ad820a49e7535bab834bc40020669ee6d8"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "c0dd60ec23504d5334c7df3976e72e78032e8b038c1848a76f038020b85b8cca"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "2dda71631e5ac2a2168129e835fc7ee364969ffe0a91486c2a71939623010944"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "7448b5b0b04df459ec98e3782dbe90af2ce8f8c52a76cd02e4e0878b01ad8613"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "9d0afd5064ac3e485060fc2e91d3d7aeac78935136043c882964fcd85a0e6a9f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "ad120fbb30a8d4f231d20d692acd4225051b099ad2c4e0cf0fab5c0c38485c0d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "c476333865d86b5f2cb5342551f32d1b196f92c2bc6a2e4cca6f4f4898fa291b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "033ae6b04e0628a2061cdd19f7f006cbbed5e1673af75a15e0a2b00cf78a74c0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "cd68837941a7c519e2efb46f17b6550878672fc5f63b48ad54d3755d5e2c92c3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "b9454be6be231dadc66264267eaed2ed8513e166f1c25ea54aee9cafa9055825"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "0b72acd9868008f89e34759ad75c300367e4f98b7f1662287e4cc68f90f4a6d4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "d78b0f5e6d81ae28b9c8d2f2200d66d3fa5bbad8d9586acb8c167250ea2b5d0e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "8c45980cdc390e1f11b3abc3fcbe99771a879cc3241a967e38e4c0e736b755c6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "40a345a3d2fef9f4c99d47b3aef498191e5ec189c275f22d5ee39d7493b7a59e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "7cc495d1b71215865a5b9ad98861c8126b2c937eceb7e8ecc1581fbc654b38c6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "67ee462d50e0833695666fec2fb74af8d469b679eaa1e4821b4a6ed0f7c141b9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 199864, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "7976b8d2a5113d3eae417e56f04058f5c7ee6486d71f2588a5d80a331c532fbe"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 199880, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, true, "500c75293845ee57ffaccde378c2ee28f7e20f61800ae7a7a99e67261acc9884"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "9e01ef4b5f4732ec768ffcd1bd666c7db71015ea0e5edf24b7f7dd4b7caebb6e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165056, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, true, "58509aeeb02a91014e6092f5770ba9711b0e196a0e7eb9a5decd124a756974fa"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "a3258142ddb13c292035bd7926c01b4b8a09abe68819c1341f42fcbbfb6777bc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "82d64adbd4db329fd19f9d7e4b4de93b5d22e407ad4cdaf0fe5e7f88be4dda4e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 181536, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, true, "c6aa038304bee3393446f9e21f9b6fa45d06355bb6306c91637573189df83f3e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, true, "ee1f7b01c7c37226575dcbe3dbe2fb3433fe1833c5797d6e6976833357262083"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 199864, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "67237d62b48b16c59e747cbcf9deee07f468d0b3652c8308f03d57be2cd0bd8a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 199880, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, true, "73d5c6e4569557450e66198da6043cefedd07e5d56b82c8e19d519bae930f7f0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "f8134d6b5a5912f4aaa402f653682e17f396b908a124f468f0e382fce43231e1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165056, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, true, "9f01690893351543ef3ddc48dd558671108715b272e14483ed5bbfda08aa2696"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "01b7ad3bf491ba50cba33406bd25ff6b7167d7c7b18f5dc55f82dd958024229e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "fb4599f1535f2661f4c43e8c6ee46566e4a8d5479643693ce108a4e468897239"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 181536, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, true, "8d65935c4341fe672aab6fd3e6ed3078f3da240bd03cae8ec6d7c38f3808b807"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, true, "40a9738f121605f7b1fb4d6a92f27e4090cd02f15987703a10730fb79df2a6f0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196792, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "296148ab5b5866299f213e6d97901c83b4b2a871e75475282e68dde7a410b3d1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 190136, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "5d0739573b22f76ca3c44bde5ae76c5b2b28f62730f48ce9b171a0317ab7f70c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 195848, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "3948e8a0560fd2f9106d754eba4212b9ee54b5d5cf221e2a9336916fe31f52dc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 190088, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "5bd78820236a7b1ea2d63ac0f09327578c30f76824852d46a4c03e7dab5d6c9a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "76be5ebd39570aa942ca03ee2d2acac62a93508fa3f30d0668fd9d495794da47"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "b6716b0fe67f715e04668d4cf97747a18429deb81cd622fd3c6f9e1ec9e87d73"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "216178155d3498488c52faf691745f8e8dc22a71f0416efcab4506d3b0ce144d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "e15d65be3c91d89edeb0bcf6e0c88d3b4e5ef04f609600fbe1faf1dbd78dab8f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "af2422e52d59c3660736aa53594c42f4ca66836d1e8dc5b51135d75e30e57a4c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "a0cbc66ded5dfe5cef0b802ed4e7f8dd9b171a25b6e7f1ca487e7c6563fead39"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "97fcddb0d4e68a13efa11e39bfd4540330f09d0c8c006baeb7306b196d176e18"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "b2458d8873b50d19466e627084bc217981cca78878b7d3a8413704d75357259e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "ae61a0e545ccdf7ae47b0beeb96710bca8fba16a61ef1de4a3a8bc7e30f997be"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "1cdc33576336f9074c51d5994d5f31c5df097e54fdad84675cca711cad89023a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "abfde989c16d0085b27a3526a88e94f3f5c86a27b06afdad75b7c81adb95317b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "e2c3ae9fd09f9f0e25187c5e348962ee078778883d134c7fdb9ec8524537be1e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163168, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "bbb4d83e12a555f4b4470f58a19a1deb30c67b3803078fedfe042e410e9b7957"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "f81bc5c8ad7cf05e30a0e036ce74dc2a6e12c5d009bc4d2837e8289e5f6a6686"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156384, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "869ccafd7a4f4cbc956e858e0de558ecf6475b8248c5cf627b412ee381ddaadb"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "cc5225dc8b26e375c98925e2bab321533588ab0056ffa604d42172f50a065550"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196792, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "74586e96a37c284cb7f73f3892ae766f66542101834f48a6fd7c3cba62477f90"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 190136, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "59e6e0f407976ae7683b2dd52dfc33c86e5c62d7c659bee85b0eb7e1533fae15"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 195848, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "6be38d4d0aad8638e2f656d36e5c2576135a4c06fd29199b716760d0d0b3a790"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 190088, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "93c0edccc8562d3a3e93a0a7d9d5a9715a80a9531b845b7fceaef6b36a219e73"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "1195a170a7680051b99672fd0abb497435e82f613c96848c5b346a9234656bc3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "8468fe2f401240cc36ca74d3bdd4286bf684b9017ca1ee6f0883b2b3a7a65ae5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "9781e90dcaa081a30709559c54dab80ed5139f3c2f93f7fa889d31213544564c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "b3618c9a69736d0f2be96411f73e98d772de4ef1d5a6da6d16efdb3f6ce529c2"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "8ec419a715e035110f206ceff233fdda9b47499246ff175e0b06101343b7f1e0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "036ebd338ba1ef1e0e2d0109c6f2ff13539b6688f6c379cc70b955ae7d036423"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "05fa1378a653d8094b850993fe4efcbcbb26bee693f54a748600e2123d9e3804"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "f8111f204ab729e9798b1ff23db8856c1dcac1b8b968e9201d7d6884453d1fac"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "570a1fac5c9abc2ef1c49d234e9ce36b988b58b7c1ab899a96309d498a10ed5f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "bb7d66f11d8edb2585a3e137361dc03b322290adaa3f49fda35c7ed29910b85e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "8d5f652b273e2efb56017bb04b35c401e000a1c3dce5a48195062b91da7e38ae"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "ef7d66098ad8543b2dd4f539e1048d7fe49bc00bbc0f3a5f5f32e36ccc41a100"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163168, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "1478da3421c96e1e81e700db1cec9d3d3c209e468df0cdd9d44a369aa37dd313"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "34d8f56ecc2b9ccf81bde7d952fd9bda43cfe3aba44e7ca4de39150b5d55c550"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156384, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "8ff1474bfde77bda306ded0f97e531eadf6dfd4d460efe32e42a4d6c77c0739b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "633bc3b950167ad06d101334c79f8c85b6f9533b741f4930591160a1b04939da"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196792, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "5819c123c341becccf9a26d2a53d38de50fcf0c3b435669e992a2527e4ba2328"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 190136, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "2e2685b637cbebf045b44f2e5c35b2a571297e543c7b5946ad1d52742e29d07f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 195848, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "0b865ae82e2963cbc41db05857151575823ace22e07aabdeb35cfc200e5fa8b3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 190088, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "2c0586fd37777e96fca5594199277b0bd10c0e95d95bdcf802750c2a463140d0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "61225cd6aa9ef2e2b13ebf0eb06c148f32a5b30b17534053964c9ce5958ea341"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "cfa1f11556f573edf95c03e19320831e17b8b7aab8049a2921261eba461f70ca"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "2b63c8584ef3ae2cdfb9b3ddfa1edffcc98f312a31ac37acd5bfe3434b986ef7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "63caadf6f6884e7acaadc529745a75635125384eca6682c3e46d616c4850f1d1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "0a499caa73a2c89760f9fca95ccd196ee29a77d9d1e36404239c1e5a8c466ca1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "013a6813ffa3f8cb3446627f9c5f6a231c5b2203e75fb692cf36dc48905fade7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "bf018448dab979c7d791f557a23bcb2af6ac1ac13aa958f532a3a9ccf78f7f02"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "ee375d9153904010277bb1809e6666bc6709b69b708df525b01fb657dbdb0fa4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "7eacda8d37446295dedfcc2c8a8dc781bcd844fbacf1306c9243025098cfb256"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "a74369914749045f75c6d4dbe0b3ff75fdba7745ab4b62884c89a05b1c202798"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "054ea858a2329ea657fd0078edc5c970dd34dfd041cb7f0637cde4da7a998859"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "79dbb9f24b40562f88f8ab5c251273ee3926e3b272f6f42adc8c7f98ea75a682"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163168, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "3f3199cefcbd4f943e15e6798201b3f2dd7227e5f2ada77b204d41f18a571b7e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "f454937b3bef6397f61af1464d5fce575af0d14afe1c3be4eeabcc1511ecba00"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156384, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "5904a02167b9a4eb24be38fc87bc5ec8734524086b46d3def32c658fd8e178c6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "0472b478ce845df4b7475aad1f79aede6aa015bc92b953e8bccd20a2f1f402ba"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196792, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "adcc96cc0758331a6acda4e45798981cbf3ea1cdce41265da2c2a14a5b83dc18"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 190136, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "d5c52bdfa05413a69016a67d8e73905fa6dfc8145f32a2113bd0b7c7633fb507"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 195848, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "42e3d5a1c76b730a51bdcd3b76cf5656ee9f55c976dbac92399a1c5186a8cdcf"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 190088, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "f224883d169873992d33f5ae342634b6529be802d662fe3d8d57122d71fc830c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "bd71d0954c51b3c380ec803f4ae9424baa1296da145b433a4c359ef1628f9211"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "95d86aff23051e0d157d9f4866c8e269f9aff8abbe967f4322bc7fc7df4d1ba4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "eb7d77b1676ceda2bbd47d87059c30cac5931d3bd1398b1eaac0175003f1c067"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "7101a65f3c53b3ea9d73df37d1e918ce158acdaf85723c6096a94624e77103e3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "739fff7d9fc16cadc4e8f614acaa1ae7c455d39e5a96bbd30c22174a427796f7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "ef4d1826756665eb947780d3632f134335218c04da18e56b457d7aaddd942b7d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "b74ced39b7e5f62a4f21cf88a0e24c753eecdf96fe3857ecfda6d97929239bdd"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "a3ff6bd415062b19204478a768df29f35479a8d10a7e99f042a3f2569cf67b46"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "0c2539184211c05c70b09cf5f1bf21f12fcd25bea4d19468feba6f05adce421c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "c831cb2be05a1e5bbdcf2d367468f9fc133b8140e863cf5474d2ebd2c51e8259"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "411ad6b73b0606583df56cd5009a7df3c5dc2c4262efbd4c7c12e59c89f7fc21"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "067d318d4d14fc778d2cada51f4e44b176ef69457e435cca90a9dcd55a20bf0d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163168, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "e3364eb9aa1bc60948275a362c29fa61ca5389732f126c5e9ea2c90294988d50"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "f6bf55d6181b2762292790158dfc4ced57acb470b907a6956c1a8cbe79623cd5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156384, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "eb862fb4ba0b64e10061be144a6c22d95a5aa916145500755cc7e110dc7437e1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "c4c4777e539f778e11a81f2bb910fd5c5cc47e955937ae0fe491f6ed83d51559"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196976, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "c55e192f33a040dde40621eb85d8e4cc6e672a0eb046cb2a005aecdbf9395729"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext", 196880, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "e483e3103e29beeec9e19151b875c38228c6c90a12b282dcf17f6992f1a272e5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196976, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "93674a86e7d8ee0a2f9f40e602e0ad71ef2c84d5c3a87556b0f2f2a20e6573f9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext", 196880, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "37eb9253eb4f9b95fe85197aedd2cf5d1ce9f563b1c94e6d6afc4331baddd203"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "56b4fa5d9e97ac987d6235859b2d4d6420da45b1e62dec94626ce56c499ce696"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197744, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "b928196bc7b0ba14441316ce3b725b1f9505c34044fee6fe7ab5871dcd7c1cdf"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "697b270684e11f4e1b329a151f8267cfbc330b7d231ff9094f9de0a4bd5a5b00"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197744, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "d948eed7c6a55ffd2993a8f12e9101d1a3b7ae5ffb3a0ee691f40680aa8d8a27"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "0ba45faf931966a8510f581a14d576efe773f6a8d9f419036d09e04776b3aeed"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197744, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "a320a3fbbed935b7bd4a7b5e2f23031271fd0be8a2af4f7a499d6f213a939a0a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "f1605fe856b91053d81f9ddb9dcbd369aa85008c743266d58683a98e72b64593"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197744, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "aa132161c2a9cbc3e56341d64b98dd8ee32724b6f22fd790f59ae2f2625d66e6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext", 196976, 512, 0, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "d5b468a9f6b4f89ba4012ffbf1f78c0a7fc1ee713c5db9c7764f629afcfc8cf8"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext", 196880, 512, 0, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "d6088a05205fbcad7eff39e8e8c6275f377f65c4ca1eaef34040d19601afb821"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext", 196976, 512, 0, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "248922e3c764c546529e6bc927f24d5cfb6da3e8d5b7b41a1bc69e38b16c6b77"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext", 196880, 512, 0, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "419043261765b3e95358a8c345680b690cca33649c04d4f66e58f3b2be2caef0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 212072, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "aebfa6a8ce2b8ab6b74f2f0e378469002d3d4783021379a1bd2a33eb79f915fa"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 224440, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "94c29a06fafe2de02a41c593aaf0e508097bb2c6530d8b6f07a9343e6fb6c9a6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "96fc2779e6eb33bfd3bf3260b8498d17464eab6c85e485976342f8ca29efa39b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189032, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "3d3caa0c0a2deceeb13669e3866712ca51e0d64246ec75e31e461c637a617ac2"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 203448, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "4569a9d1f48bb4f32a19071c56b8da2344f9528ea24cb8226d6243135b36131e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "329cda09630232f7fa899a536991910f26a94841072e3a3d4f0b4f674a29c8ee"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "92d53cceab1ec815a79b2d441c0da4e829df126d9abfb9f0724da765f6f6c447"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "d748ba66e3c06f8b00c8d5633489237353005ee661be15fa07bfb6fcad40765a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "1bcd3df9695277cde346ad775d88d8b9b1a5fa5e175cc2d2dfac0e65be1af22a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "2eb0a60324c062d7c2306ba20a12832ce3f39737e0f0dd2924deeaf866534075"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "0b6efe557228d3cc405f1b3745a3505e38acc0e2a604edc010d22d54b4bfec88"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "82ac974cccaa839dfd5b9459463f59eb954884dead4c9fbc084f4c044272361b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "ea717fa8a13a66476a0d56f3b57563fcea07574a1eb71783893a7f076a571e44"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "d9ed565300cd32b864dee97c955446b3560d37e38d788419a6e4b7b491e574d6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "b3af3c274d13aa85ffd0af0a58e8d9c2da80bfa1291980d59209b5bbc276dc40"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "199bc06c75104fee5daf98f37133443cef86409ac7448e9a44ee47d32b9c39e3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "546b0b6a2f273052b92520b99154b7a769bbb3ab75c5d74b8b6d9744f2ba3213"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "7e84aca502a7ba88a6d2f60978e9d8bfd95f91e00fb1c26c0d2c08aec9e10f0d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "e744be6fa3455bde73b16a4a5f39eec0f6d7257caf19c70b0119e4c7d5a8b45f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "5ed4552fbbb774c019247e15e52220a72620bc46cd1acf1aa96520756d0b4f6c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 212072, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "caa5893d0a4d6fa461b8762f23ad33e910031ab7290fdcfa763391b2d8ca3596"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 224440, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "80614d6f1be3a2738d725c52193683d21b1d594faebdc1c4af47528ff7f15b16"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "5d5033d8c291cdd27b7232aaba491db5f4dfaeb26ede3c5a876c35d4b070cc32"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189032, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "07682479019b3df01a9da5de3ab1859c3498ca72244f630fa70497d0c1448190"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 203448, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "568da91d00b6ca5295c7ac35ce8642a5f15773384f4588728aa1bf35900c4374"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "62834f9fcfaf3f3c9f7acb7ad5c341b9c7fba420bc52e5422529d262473c9a71"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "7f8079c1dffbe5fba262de36b9055ff9138839b52c8c10b0ed553980b7b52d2e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "6f746da0cb71b6d466a181b0e4ca8c8baf8a845c8ba808e8119cb813880cb891"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "35fb6e99fca75d9f28dc34bc0afa856c8843af6a4a3c5b987e04828362620428"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "6acbf54adeaa73c3d3bba58df3a4201c0e857dce39e7d91b87e66a1547dab323"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "e81724be723d70234316bae094b478ae51de0324c763e00a94e200923f7fa348"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "551ef945c901f5ad41c2a7ed95afd69a40d6a69546c3a0674e1d72785c918449"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "d05d08ab2670f26bcf983902b93df45ee1ca8fc7418abd64d4db11df1cde515f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "c731c3b2e9aa9727ed65acaebbd5be9053a59e735f19c98c3523d7d16a6dc5d4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "59103b063cb33255ee31b26af8dc3136ebc203ca0cd9f3717ed3cc5b652857c9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "c4b6fdc333e7ebbc0c20f000acf5d6738c4d8e1ae7986c0ed7b4e07655c73683"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "632e7d7a376b036e6aa32fe35887359ad0b5ea7f97406846230af3f2b533382c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "011b836744d6857115caf65369f519ea85d5b1b74b40533508779153041de919"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "2605c58643083642d31d5e46e88010a7ad43af503cf8004567692003f8534a62"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "390e3bf974601478064eb691b011106d1f2e3853ba85bdefa98a9de5bb71ef43"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 198728, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "e0335a137823ecc00e652c5cf42d4571bf97626b53b544715712b02cb19ba23f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 185032, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "78f497d18c03a505baf63f9f3cdf3bab33d575b1c7a6c80bf0aab73dc7d686c3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "6891e41f200988c52d932efb9e55edf8575c0c9339dc2927066ed328c5895533"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "e7752990fcc50952e94bd9123a07d631fac5db260842e4c9c9659bddc78afbc2"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 169120, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "d85e44cd55cf58589d1c74adeacc658cb6c625117f1f3c6c18d951197835d0ef"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "01c97a9ab2dba15fb88737331dd0ddd705707c0fc9b75d725934e68fe4d58416"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 153376, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "f3704b91d729b9b04cdee7e94cc6964c46029539428988a4b95d3c2895533a94"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "57f32bc7d530403f5c2770f23c5c8e107647d4681ade566d7d2400840512e80f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 211560, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "7586eb66a244fc553c32188c55b58576d2dcac4fc87d1ae9030c5b8a2aa87617"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 223928, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "9447966c695b994959eddfec8e21bc33d19908820233d00eb390cdfe826d4dcf"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 223896, 384, 2, 32, 0, 3, 64, 0, 3, true, false, true, false, false, "7f87932d8ce6d96151eff1b4bde3c253bdf3c28f2524c9e018498906a7b812dc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "402e99537150d17aed9a3f7b4e9a46bac620dce0e1ec767cfc10a1dc78aa50ca"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188520, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "d03a793692c2aa3ebb89e32d96746a5a8dec99e80b168b26b3c2029e66f64009"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 202936, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "53b60a71cd0a015ef2c070d701650b5d9ee413e44b9e1451ce3ab00b3e636388"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 223880, 384, 2, 32, 0, 3, 64, 0, 2, true, false, true, false, false, "4c8f7eb0e4f7a8d78b8c2bf6a80992058b1d0189295e135e084d7e3c6a4fb693"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "263532a37366b3d4ec599d61e445bbc93564ac485464059e67c099a3ddb23ca2"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "1f58bb595ab8796aa03ce49074236b98f4aaea8c1f416314bcb94f3397ef4c5c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "8f9375c7f2687c204ddc4bd99c18410bc20e3b5b7e118d96f072e575a1ce404d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "ea133064ed1e9c1dfd06c03f2525dc087a9165e721eaa18af2af104f59f19e9e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "c6ce9e89fa4e252638e6d6baab5279d6f29223f7a9cc864525d4416ffe39251b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "eec494c7eb91ca983eb82498918858722f60be571b19348ab6ada28a1b75306d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "b1eb921030e2f452ea80229bff8e3a5f9e0d7595dab82ce6fed68d46958903e4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "5771ae8794750c84157c9dbba83391062773b49b40fcf07b321d731862b753b9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "2e297eec64081cd79edd3555375eeedfc616c6378dc3c409aaa5987ce9db4183"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 223960, 384, 2, 32, 0, 3, 64, 1, 0, true, false, true, false, false, "58840ab9a8c362751ee375f6f4f24f6df350f12a0c422df6ecde2fe7b8e04541"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "f525837a9bf1ba16d6dd9295d9ed3e004a89acc0e58d356daa367d36094423d5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen", 223864, 384, 2, 32, 0, 3, 64, 0, 0, true, false, true, false, false, "637280c6a8ac48430492948b70826f578500aee0d63f7388d8b63427f0e4fbd5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "25ffc1cbf8af6463e0d13dcb036d7796388f68547b38687ee1c023ac46be0c58"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "6335a17b031a70eb410d780e302f6b9f028ca712e8018a2692eecffbfff3709c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "2517aa036b11b8ed06af0a17b0e472c25b176a302bcab20cdea3c4516946c48a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "8b1151d1b7384178934bda185fc9782a3db4d5c030c3f746e33bfda8073c0189"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "656afdf3f6afdac93624edaaff16bfa4a00619f038772e67845547e20a3a765c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 211560, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "6e2751df88cfc2c58ff7bd198198b57f1691b78a0b5f2f4d0672a478e269c41e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 223928, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "e47fcb3fac1ea1a06df1eb3916fd4114d8b47de1cb92970619d74d55bb626893"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 223896, 384, 2, 64, 0, 3, 64, 0, 3, true, false, true, false, false, "61ab2360e72140f9171d8edcd6608a4b563b2d1fcaaadaafa1e69588a9929bdd"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "c2c33ea44f571ba003713eac672181f85e059485b0abfa008362d6d6259fc3b5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188520, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "71e9b458b0d46c9f985be60e2f51834d58a6fefc713880763a5f2938bfc9012a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 202936, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "447b41d6da34c8cd6986208b1ef8522477c179d4344bb6df3c7091975982b820"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 223880, 384, 2, 64, 0, 3, 64, 0, 2, true, false, true, false, false, "4c2e7430d32ffd28ae7005aa99e959372124e66475255e8f5756181ed51d725d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "a89df3c291312daa2b109d6b8f41af1d6f0cb709136360c45efdd45aa4fc7b34"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "7dabc2917ac14e6ce00a5786949b94bc9afb1729031ff55fde58b72579397da1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "5769a66a9d29a6730ce4264d2ed5c70daea1acbc71d4acb0b4beb27722051165"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "fb022018f8a2feb71a99dbb5e810cf11232e1a6d1572f5e829dae3e89762c04c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "3ffca3800a08e1900b7bec06393d2ea6140233f90412a2928821b9832ef4015e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "6ad474f9fd8f79d0aa11a7815c7eec81e1b4a0989e5e7c46a8ccb9625b27bc6b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "5547a5d25911293a92c93889a3315f21593a83bd071819f9d55ee50cf30e9527"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "97d19edd82b522d04486bfcba7c7214b66d3316cf9f9a82d43566ece1869405d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "8f0e067f3bcd438e9ff2b894932423a4c142a8555827275f55744691a1861f30"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 223960, 384, 2, 64, 0, 3, 64, 1, 0, true, false, true, false, false, "b7dc684bb536f07a36227fa2f587793c3868ebe398e7928299c896161eda36d7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "64941b1d9188aa5651d84feb40dfa5514cf9d70aac5ff3136cae8105f5217abd"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen", 223864, 384, 2, 64, 0, 3, 64, 0, 0, true, false, true, false, false, "cc8bffb65ceba527d971e13be427079e91c098590a199a61c031b075d3d64540"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "34397016e50982f6eaee3a2561c460873f87740a9b9edbbcbaa8da45754254a4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "e73e69c82d4cac054a67e6a669dc8f709d208275a14c8fb0e02b8ff087e5629e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "bbceeb0adb29950ee718841333cc9081653874a2cc81a2da4f6a8150c6344648"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "c976b9aa7764768af2af71558e17a01b62b5b95e28500da02989238b1fd25702"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "26f052b135a0e4e3fca09a7cdd0d6fcee1f6273586889a8e35d57dae078613c4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 198216, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "c5aed684e99cfd930eff02d216a89b9933d4e34f967f94349ffa2b6deca8aeec"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 229272, 512, 2, 1, 0, 3, 64, 0, 3, true, false, true, true, false, "152f683b23253a8bfcfeaebc62eeaa123b7c7c7409a172cc439722c827053e06"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 184520, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "6f97a9b5cf066d3a51bd5c016d8658c120dc69879697346c51b913cc516a0400"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 229256, 512, 2, 1, 0, 3, 64, 0, 2, true, false, true, true, false, "f1a924732229f236d0723fea1e7bc5a47a562e8851e19063d0c8a100cf2a4e8d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "f5812e45c858bb7a90678e566973ed5e2c16370965efbd87c92dd93fa6b47bd8"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "dc8f6967842d4a8a64226a200cd9167992469690709bf32b2f6c9279d25451ee"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 169120, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "7cd53ab59cb00884e74ceb09638de2e90c488351d59e4989d052f574c9f0a591"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "37124c9ef479db449177b55f0e3e31fe683cd5d85d9b3c14dcd48fac5633de7c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 229336, 512, 2, 1, 0, 3, 64, 1, 0, true, false, true, true, false, "6a350c56a342c26f0394f8d09729c1940befd09f35bc70ffa9913d8f1aeb5789"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen", 229240, 512, 2, 1, 0, 3, 64, 0, 0, true, false, true, true, false, "c37be881896d31343d6b50001f45140645b9771111198ebf1187ebd1efeab508"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 153376, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "69576b684650644a7297da0332d68b2d2fad7dc8caa9031cc9a5952741fc5b3a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "b657e6e951e10d7fb77a6ccf8b4b8d512aa0454e0b412068f0a54d388138aa4c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 211304, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "a4e42269cf90d52889a344d79924f5744209566f0eca779eddba80550cb62de3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 223672, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "782696e210d231501819a1d8f9f337161c00305de05b043ece9a0bb969a4261b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "fd77b7b70daabaaec7bc28b6fa3dd744234971b03747b50ff441d0392648d132"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188264, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "7376b95430448e7f76b20ab253828b5c07efd4b6392233c418cfce6db5a78602"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 202680, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "977bb678b0fea7e4995c14be827e50da94253f868c2f25f720f0e8761405f244"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "90967f0d394c727d6f945943257392be0069d6565677bec655eb6adbe8b69068"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "c4edcd10aed95054a4a82564db7fc74a66b4e42711d300286d7983a32d85a8d9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "5fbef1d002fed67f7a332e0dd0b6013c672bfed318a9034b6bee51318f7d3b4a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "2c8a0be877df9fd06a7c915904ce89216761209df03859e7246907c34ae86ea3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "a5a366f7da24e994f5d7b69766dcf6cfd21788ba5ec965e530ceb757feabe764"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "cf7649dde46a9b0fc6373a2b78ad77920a41e0dd6606624c0cca2b47370e9d1b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "69249892d096a47167617f2d83d6d604e5ba6ea41505632f24e8b5c46efb42a9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "7d5d59e82ed06d121e0273706fcc122534d3167dec9ef675986e398e9b27b533"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "91a79868e102855eb4545bcaeba02263bab57182fe85fb34e142e40cb40cd4bc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "75e7d367d3882cd65cdafcd010ad19129264093213b7381239e4fbb8f55284f3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "818c73dc1b25ef722228875204daad4a50452a71039f4bc272de02705d08cdd9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "34ab48608f3343746ec8e7043f1bb1870ab7a6adc90be7a21c028eac68a775ed"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "a6e47c37ed118173bafe6d5f56d9dcf0e8f8fe8b4880ceb6b83399ab106470f5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "0ed45902d82e55efdb2c024ce0334b4368a20dae7a21db389ed6f09f4b10c229"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "b20e3d0ef467b711fb5a5c776f6ff9b3286c21218482829aa63d95f2eb26ba39"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 211304, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "4e39289c87c6583d234e0aff854166b7864bad281fb2a51245cfdd47ac7c1837"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 223672, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "e2f1f8348864f3b87b894bbc01759a0f67ee20d395ffd872ab57e49803d43128"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "397478c84266b4d6385eca82b5e4fdf23166ab30964dbd35e7ac0f621ac90f9e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188264, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "992afc075ecca95acd6fda518986fadfe7214b66e35e883cdb6447c56ee2ef45"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 202680, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "fc4e22135d2497d8eac15be4e3627a6e093ff2560e1bf1d684cef47c47f74349"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "30448cf331dfea7c25b1a57a8a3164b89ca81c1c60bddcec880b60fea5866d8b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "ee5c3b530cbb3e9570862a54abdf6b30ccb0ea8d788165c9a9d05bea50fc51e7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "9e23661628491c20ba5833c01c17ae360d3d53be2fa2c74e93512d52a2235ddf"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "a682d750f75b9d33d259cc700a4956238371eb92e9e8c2aec4aec57b517005a7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "384c616a0ef4f986298a68722569dd8cc8d8fcfcf68a9719144e49182f593821"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "844e3f82f58222b9da1186502d42ee92dafffe315a0632de39ba893a76816f48"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 178272, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "caca2e1a9d60ebdddd76ce1b6c3fb848372249861a92e99dfc0b45193a33144e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "0a5aad3b8c2db385fb4c5958f9ab7c6ced959ebfb186e4e6d5a042088a59cd5f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 190640, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "44abee43e03f80e8dad28fed7a52cbe0df08490b3dd9050c594147d23e0ab22a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "681b06cac3764c312bf9de617c8ddb2b634d0da3c5c2559bd3c5b3210fe5018a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "8f420f7c7b4efac3523a917e08d39784f1e390d095d27a03a118b38372b65e71"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "e9a7bb04c8e7f7e2eaa007486dac4223eef828d80e543081233f084a1d9dd324"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155232, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "e15727b1b5104e59b631f5fe4252081840f0341411542c5bb7f7f954d5136d0e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "3f711447c0e1a7cd24bf842303ebd365df1cd81dd3dfc00c96cafc52efe45db3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 169648, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "103d02caec74d34e8f64a9f8548eb8f50ba3160f0fbaa3a99598b6a8d1169265"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197960, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "5157f7ef4b39847f556a6242da6dfc79377fae1f97b574132e557512bad189c4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 184264, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "4c44fa81cc7ad4fa759d759eeb46ec8d88776da02a0a2d227f797554ac962aa9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "b9d56f9819cb1e93ca9736034952bfcc54564c620c1105483a7eb6cdd6a75b05"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "c27593ffa6108b5231d4072121a6ccbb0932e01ae03ba597a4499e24c04eba08"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 169120, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "63769f1d993d1d9daedc96bc28a313804d0ebead90cd4585e835f7017bb692a9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "adc2b0156c6dbb564b2ab00d3346f52dd336eb11e0923aebdbef3cdeb136de6c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 153376, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "aae599319b224b56ff20f08b841ac447aca4a22744ee4eda8312308c019a53e7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "07c5344d5a841ffa820547dd40d4d9e0d6d388759c636d2f5fc1073272545802"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "cb5f778bbf08d6b86f6cbdfe53e38d66a3452fc23f460b1381bd075abfeac853"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "93eb8679d797bd8c657369e90f40cebdcc45755b55a1cb23be7f14e6a3babff7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "74d6c72594f1ea8d1545349aac1783c4b735cc178e758bfa95d2fcd4eaafca86"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "e3e0f470b1156b94ea05d405c702f2e0ef2832e9b8a0ad336729a89d2f952013"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "2f632c435fc9e615e746b4ec2fe0bce7a1e26df1e1cc528d298d0fe674c2879e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "99129a90e50b025d9f621175826a203938d08a90063bc530b7bdd75d65562e15"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "0b5e71642c469e9854c7f18b66658ed729e81740f091e5a367cc40c73c96ec0f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "37e63e562cb3999291d7b028173b69e54ceef7cdaf7b4b5efa812643c6fddec0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "f7798bf31e7ce2f8856a6fb3cb56487ca85c7c7d00119503e2815296d938aba6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "3c3c7b0f44c30609051b2e1e7b021a4dc25d3722fb7ee274cbba04006aef1757"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "e0c37de453b4a5ba61890c9ee6719ed4e770f2c655721620805e7c56bf24f730"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "fbf1a48e4ce6c4c6deeabd0b1de5cb65491a0846ef7e58babc72b241812bfb0b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "81fe4ebad050fdcf73401840eec04dd203992a2613dcc06f0f70667c2ab16823"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "48cfc07b4813151834d77048999575d1caab252b0424184932132e42e2b1d57b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "a01d7cb3286590e09e43397f85f9692c9c591eeba177442aa51c40bcf4e93954"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "52a8a0bcc35a765a61027ded8e3d89549e09f83b9a1d91cda561e182cb55911d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "b36112afac85b6f9a4e8aa43ef73421f4e79190aa0608ac2934b03973232a4dd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "35f70f9db4a5abc468f58e15dd786261559a5349f52d5cefcc984189b560a39b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "853a204c60b56640129c0ccd96416a70a48dce66037eb2605e3c0d2e555e8ee5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "14b5907c8c0a2da9ab341631ea047c898a40e3e94d52aa465cf11499113e2719"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "7b3988a1f3f3bbd578ef65888e47afe5de5c336b392e869e10b69529acfbc1fe"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 228424, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, true, "5a9d985a1a4e2706709cc012235be5cecb2fa89dcacdbf851a001f7f6d075651"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "f05b431861eb2115f6e0afdea35a0ca0ed5581aafbd8760f5d87e80695687a0c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165056, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, true, "ebb9b0525b828e5cc16dbab6cf0f09b02db95b2cf49b287cd0dbd4bfd56b13ed"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "42e078ddb0346293e4fa8f22ffb03ffc1bd7bb379ad9a78786fac93c14e2a3a5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "3c7147675bdcc208f6334a14432da0569414e93a436b760dc7baf8a28b756191"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 181536, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, true, "daa807fc116001ff94bad23ec480164aa55f2a79c36fdbb5e9b750acee50d827"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, true, "174bb063bd6a99f8aa290fb96089a40aed29adda2689c882c2d9f44cdbb61357"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "529b7cb997d9fc570e0495fe11fd2f26e98c45568e156abacab20d29b2467e5a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 228424, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, true, "49970322ab9eb1ae90e6ca2ce3d7f1bab85b8e31bcab02c61d94ae8ff1525011"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "a40c0f3815d4bf651ed917f2ecf6b547c883e0d05d782daa25a4f1b44fea8f00"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165056, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, true, "0999468437aa1a1e9d2dcda3954f40ec16c724d605b5edcaa5cffd7089b97bbe"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "e2d481e47ca1c66c014bc5a428a30f77212bb6abb598f4b1fb6a47aa1bce1d69"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "c0dfb3a4a90b34addc0e430247bb18202d98c55bd4317100b58bb5e43c8d2b18"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 181536, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, true, "977d77f4643b93dc4c065cc6390ecac3715a4fe1c6416430508f26cc489ffbb6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, true, "97dfd6b87e777257e2c6a5803d2a60ded875801a1722e45a2f053f994e3e3d16"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "f9cb2db87e3b2760fc477906db51b49cef9dcb101aad129cae95a9a9388bf5cd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "c3dc973e08a0d351fef181a5b5d20f3a281d0d2ef361a10fdbb3ed87115acecd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "a30ecd79002f1db7957666bae3b5f205a4bb54c03cd9483cb7c1f0a29bcbe47a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "5db6e1ba78aebd9ebe9b0e58ff3a97d80c7570122d2b31244fd889ed4ab2c565"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "e324d7244c9cd40c841bb498a2e97a31d58e5790518a9755e410aaf4981605fd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "9681e11d4a6b786f50ff1426e9ebc07ee26b4fdf47da8cbf2e38224b2b93cfdc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "44406b0059f827d258d7bf107be4d2ac706a8df3d5c6a007cd0c584a38d53bce"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "018b83ebb34f7e50824c2d752832ab5c208946770e656b8ae0fbc355c1de82f8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "786be9ded6e432a8c0a6a02751ea31491cd3eae58a454181b51717e1f6cb32e2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "c2af5180df0ebf4e7611630c827374f47d17b94897f57eac2932da3c303f1a41"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "26c46d6ddfb7166634c01152c62d92fef0cae8eb003a194c5c8a688bbaa9c0ce"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "81952a2df62f5df7d2ccc4f7898218102f0f6fd0f74ff7667669fb6b73f3b026"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "9fda7b55057adce4c96ce361e8a02f9396fcab737f8e1f905949c48806ab7262"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "bb7696b3dded1e49e1b3c39a38166ac4b9a4c0efc07706f329bc04022caf7775"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "b831ddc0ee7d2503d3c910ff008e32f9c1fd3015e4a6b31f66419c01758665d6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "72cef4ae3d51cce565a2c55d42acd634640a46bf6bccbe9ff8c70f3bcad26c53"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "dbc8594b01728d3c41b20e48e2239f1844cc9e8ee54dc24788289aa5f0e08093"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "a2695398a291c596780a3b02f89f177e281e797f20daf5ad5ca722f695817551"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "ff0b8f16f4e901aba4070d351bf9904b0d70010a43b61d2b1e75934af87d5510"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "b93e49ca124868561c24dc26c199dc24c30c10460acb702a28bc251d839bd0dd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "6a3e12de9688daf717214c51edd111d61694e0fec35a064316cb8553eaa13618"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "564f2a9d82b5c6d0f8a74b409a989efd25d613aed13e3d66f692fde9edacd421"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "0e326f4867ca34d2db9e6b2e6d8991e30ebd78ec201ba82658cbf4d632a90b2e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "06b8db9845bbed697ba124537891bed8b760cb3289527c6726bf623a533ef119"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "9893d17294372da18bdccad336cc422d0d2be8b59d1d48c48250ee167965c253"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "cfe58d86c1bde79dae61845836fef8a851e88ad27d340e9a81aa19f56af306ad"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "d90a7dcbedf3a56e4983f364b89b08fa09330ec47c9c7bc689c92f2df985988b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "012093a180467a1fa258b22b8bfdab1e0d65358590c24571d93fc435b7937bba"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "65d0c84d04a544ebcb12ec593379c4c862348c4c20b3e90acd4ca2af9a30828f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "4c4a84917cb9b1a294893c1979992b9868827b2e6929b6c49a93736de48a2134"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "c6ec67a9c8f08279aafe22b108bb4726376ae199380fd06ff2e55d6a0e807302"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "81070447da057906e6c810a6d513c876b407e92c20362768c81779700da42e1f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "b1d192a509d5b9f8b18d600be438d7f9be365201da8e2d80bf91063c298eff38"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "ea29a753320491ed582d21889d403a54cfaa8e6fac479e40844b58ffb8410ea7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "3aa5dc6466e96f1edbc0e86e725db158de7164fe67d3e9876cc2e3a24d9d3043"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "ef0126e0f9ca4ddacf9a32ede6a42c4e3bb5ec65f8b177ef461e945548cbeca9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "26e02a78fdb9ed662036c3652f0c7402ef5f0e06f22c003dafb56435d34d43f3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "106ef22662b2edf2587f42528e4bab3cc853018a858139cc7081761671692270"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "49a0d03987e1ee042f1ac5182d56770d75ee63d3d429aad78b2f5cd48e886b5b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "14ee602c32b2d2814bcefe7edd819cd6735bae75936a7a3306fee34cf3ef0d54"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "8013092a3cc8cc757ee233f9a23d678b7a243ce37dc4423aac14f9596001bed5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "9554caa3bd72604e2c813403d37849857808b3df95b7f68edfe6b5972400a53c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "d1300bac9fdf4100ad49d96b3125c55872dab35b7f08a9145de5156ca996612f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "cbfe998c9e20eb25d4e0411e740f41815025667a955081fa0ac608722d9f451f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "b80fac832e71cd70c82e67c99d6de608a878a5310c7792f068602e4b5ce752a0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "64c5ef0f0ef14fa5b95f04f4abb4b4a22e71c26123363ee1b1718a443509acdd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "5be74eb8a192854bb8b19525b97cc3a9dc57e850d31e397518b44530f0fb0259"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "f9208b73c06ae6c3d1559fc499ee39001ceafb137988c8451ba2106aef4a12fd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "3245c407908e6dbc045b0aaecf210df1fa056b412d2e856e8d85bc4d9164c4a5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "7ed442396063141fbb4d7856487f565858e5d5a34db9f77d3a0463e4513cb8ca"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "a854e372606f0e36d42faff16c6d6045c8c500ef9b98b2b4da4322f18e8c723b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "f5ac80fe67798b37c36d42a08588aa842b8c55fc27de1887a9d748123e3910bc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "d0c7ef666ac88443f7952f24571effa40d49032907d486512c8629289f6c9ce1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "9d9381c67cb896bcb307b52072a19aec585c279efcc485ad2c38a986a77b727a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "56a449cb63975f80103e0b31f2ec8748d78c222a43058e1e00eeabc345458d83"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "a6969c8b57d3c95fa718b5129e35cdfae7f9eef33ebe83ae8cf1a113886c142b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "3f5a2e6760bf51a645e11c8cad1eafe23e08ce0d2eb2db48e12e26597be0eb4d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "9c1c7c9e79ec4545c662644d12da4a07a741b1ee4f4d1d86571ac4f2cdfb2be7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "370730dc2ab0446138426d41d097912c8da9f6aa1d1d60cfed4f4fe57c194183"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "e80479396149cecc4463cc4434db44678fab3e09f4fc74da91dec9b741fbba8f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "8386b2073ee95780f2a7259ee5cf8dcb3d34f298470a5804cdec4e336fbdf933"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "ba6f8f4c9886f39d2764705f5c86c9965084a1cce4a2a145ea4e88f9a2d0031a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "5ddf82c8a724a0923c5e2730f8a420ae40148c126c240fa4598d5f350f4629bb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "ea7a49923029af1199e687f2e02d480392a61863837f8fb85ee26fcef3737e26"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "b904e3c479eb685334f24670f31739d2c2580011929a86409fb82904478ec2cb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "2da83046fec95a1f3bed90f8b4d438610fb5fb7c71d5e51c3cba5f153533113a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "87ad98e8d2af775a2b1a36974a65552c0fa0c53aba189ba8f37ef51cf39ee4a4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "48dffc0e44aabd53b6a711cf700d68caaac3b266753d6c9dc1c1d70845a9833a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "ec6f0e2442b3d1350e5df2f88b11a0c3cf0b3590c6082b9297df06f53c143e43"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "47e03c17b6a912acb52794e08ae05bc42375da66c75da4e643ce081d9c60e430"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "dd1562028ca9706797106a3f923fc9e41db9dbaffefebc241691a05a7a458fa6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "657e680f7dcdfa9a527a6d9b5792d6f89c09016bbd85733ab3616d84ab02dc81"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "bdd7cd87664815e575e5e09b0e352baecf98df8e3448188eec7560dea1f0001a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "927d8ef5b8ff2d65930e16d7c9581b90452f771741d8cd6d59603136bf1c3f31"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "7fccc07dd3ea5426756284b46fdaa0447695dceaf126527d069359d0f47178f2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "f19baa839a783204945fe1820ae081dc71ca6751c833230c352b1d50253b7b4d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "ef3d19933b7f71c0bb9b42d7c13d8ccc318cf3258a5b9c688363dc659332089f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "fc16957f6a6dce73ab476c63462a75031981efdeaba720457344412bf2773586"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "c596b15393db1e577ec5858c2c0d35fe8875fb71e7075f06168cfe8d5745f276"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "ae1a7fe8dedafe9be0b30d077187f35fa8c8b87383145857a17e1c8f5ad34d36"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "1a30c5b168c671d3d1d646b9f87f70d5dfd2f64116134967054760c0d1c7e855"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "e5df4a161e5f3994ba8bda961fdbbec792622b20ef003ec53bbea4fbd7efc49e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "43d182dc7148f7ddca2cc68cbe7fa1001bc9f3b7aa7469ee42f49b6d4dfd16eb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "44daa7c43cf1e8f2e337451db2f3898291b3e9ea345cc4d5a31dda54ec55b000"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "380a68a159201887412989bd1efe183a0c99d9af2b9a0f45ddc8606f1eae3051"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "b096b12a7bc908904db744c33546a6e556b2eaf3de59901ea28c5e6748d87c23"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "9f4ebd667342996496e63e1dc3bb18433392cb9476f8d9f492f69d6b6b9f1e8e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "955007328392c95f0819cd644ea8f5eee8d6e2da89c75cde4c1f1e70787ac5bc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "2f3ceb6dc2c5b5245047991416443813a071b566cbf9b3ae2ccb1a2077b4c538"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "c2a4aaa6fcd95e3a8e465fdad34e8fbb27ef9ceff640b74519c2af692ce03faa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "fb3923e822ab149996d97f9a8413e439b6b515986f5fb2a9ca9e37b3d875449e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "37b42fae5a9db78d84f3bd841c9c764444b310fd2d42b1801c07078e7e8b8dac"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "5eaa7306b8e6264cd0fbfda773288a407f5df0e36295ffe901eecd7588f33a99"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "c0bc6f1c22d45145fc2433be352b9182b94f38e441e4850cb96ceafd3202ebf7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "a154972fd7ea2245391ead429fa40e2645eb0e736cb7454840e48a335079857d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "96cf533d6728ee9db15a7a2a112c4404c8f199c62e6b1e71ce93e1223ffd08eb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "293abb1114582801833bc50e6d6cb42e0bd7d5cba72ba3d758e2b46f1bddf03c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "d87237d975b963cb805dcda451474f86736c98f5dbc1b2894c96971222652c66"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "816ca1da07f42b3dfcfbadf037b31ec8760c30a27693a1515c0b0c10593c9c6d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "3423d0e4ee2c008cec5cf61808921bc63bdd99ecd546b537e7a9bf61751049d1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "c5b065778b41b177fa82e864fbc78abcfeaf467b319766d5153c8c53a4e149ed"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "163b680066bcbd95d812b204220747df621aff0bdcfbe5ca5f16a150bce71a91"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "b63eb176a9fdb9bbbfde1da4fed6e4bcf8247f9f9804ec17e0d774b779dc4ea8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "aa3339a4606bf65a37660ae1caafa6be9a8f0bfcb4441c01c5f3b1a0e8d1420c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "7dbc010930203730d27829795fe6a39247a48715b77df03f884390538ba0103a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "0f338b09dd57b879508be24786eaec71e71e077c271226af4b2bdda6ffb44bc9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "4bf4286a67083be3ac6649fb30308fcb4f9e459596ac39a2cd938bde5c10a753"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "cabf02b0d5c0be2b266983831f5a14aba7af7be2d91fa22e984175a70f847237"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "447058984a70764c9388c4a58c9e28bcb86eae0fd4c33f9e031f06809614c751"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "a5802e10dacada77bd60df937b4e6c8c9b0894d138384c7cef2fa2d3166f5f65"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "5e027e22ceecdae1992056e3790695d4e65d8486a42974f13fb7cd355e8cd273"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "654649bba554ee4522d8f6d16f6377cc7ee918f8f1792d9f00b71bc9ed78ad6a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "5fbfdc9da046673fb5178369e9e77cd2bcdde8da7d3dfe919cc8c18192f8809c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "c520ebeb1a98fdd1e2898724c3ee8e207c9c4fd9764122cc687d9fe0a26a7e1f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "975c7a6da37409097c8efd95a9e5f2cd685239728ce67c9f4d9a8562c8522724"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "daccbdfb1eadcbcfa7070849d2d6ce6b770c501c4f9ea8b68eb384d3c3f22cba"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "f304de67f43899fafc86ab7fb32275116fc94bd845df64cbbafbde6b6b5abbe6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "45c1f263667b70725639b2f42fd9cd59374b1863c83eb44bb1ff63a6b588a844"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "a55e0ed9e011e7512f06e4113e866293bddfdfc9b3122896e0dc96f3f1452000"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "579e603aa1bdc69ba4608fb77ec8888e2679fb658460a7255693bc15983bedfc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "05a9bb62bd562641085a2e05b9c1cb2ee05db11761898cb657322a2806225f5e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "794250fd1e473755f0091b1852a31174396708507753a7e974b33a33bde342ca"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "3069654ac8041d3223c0c6715c9fa613bf872ccad38d2e6f2b3ee201085f9665"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "70a15101b8353dffe66a1ab2e668666da90c6dd28629302ae097a1d99f23b820"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "cff68d876c59c380d8dd6000f2e966ae3d13d525a9acf242d9fe056776605e32"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "eafeef378ebd4d28830584a34b0529fc525d33d9d0d8f8054a7477f1f5981f2e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "5af9bbc8c3be45a61f1052872fd79793f7cf185ff80039ec84a860330b2f6bce"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "77258273abf64b2d8cf48894c34f0ca44bc7e729464174e29ddc0f5292d1bafc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "702edd9861e985c332aa1bc8b14415fff2829681c5c475dfa87fa61718516958"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "10e45fa89b781aa578296603b18ad63c928553112721df64f458b0390b05be7a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "a8fa66b2b06e8216f0200411e2ae936af04751fd9a4f1ec4c9931442a92837cb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "bff28721a5e20dfe84a60d6a79625dcb917e1b711fba555df4994c6b8dad2874"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "a2764780fe74045c5240a75d4f7b2d82af3c2d8f5f32a153c166390b62947e66"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "3438d8d1a7b8034d56bf1532b3e0db890bf50ceca7731019053573900d33a0d0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "bbe073e7a71b8aa1943fa2b40d24fee8b488589203b39261a77830b97c06e0df"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "c82d9c2934bac6b19283a378b58f29c1538f8346754ac35bb5b25661315678a9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "ee798d236de242393ad217be278d035e2373f0722ebd36f9bae55f0bc70b488a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "e4ca1f2264b4e2ade231199ca8d80238152350884fb50338ad8a54934e2b871f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "ca9e24be987b374381d1d731124fe63abd17068846f6073f880be0c4e2da60b9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "8870a80567341578d0444dcf396c9a526ea2567b56f7fb917d1ef084b84e49cb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "6da2e938ca6f0936153c134515dde0d6b16257e07f74dc096c499dfc6c7812a4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "879159a2999e4bb76a94e95c711ce3f64c5f065a0add22ea4725655d57c42b54"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "d88d8fea93eab36e0788a31959f9bc52dd3c8416f34fcc48228e85df911dd6a0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "683d2a2a7f96683d1acc578f4bf284efaf77f6884bae6d443ae3c588321de772"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "480713fed0ce23851a75f41966d6643adb0d3c80b77efcc9722c0d21532eceeb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "16a8eb2d76567da30eec6dfd350ecea7aaa65322e3e0809a5f9721be3b5d8f14"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "af8d33c68d051b72dd9987b16426338de42960213671d8abbf7169b6d002f2d3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "bc5644f0867e35c153c76a8404d8b65b31f5cf14f7aba13f842b163da3b6b566"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "ed3fc1c10eabc0e4328ff8641a3ff77f86d27e03cc20058308cb9814f182ed68"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "4dda491ba7f27f0f12958fd5b9f96b40771dcd9df4575eac265f841c14159d38"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "2bf5ee8fef314cd5bed1d2ded9e72cf7ea442e575e81f9eb0156d27c01dcb216"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 191832, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, true, "4411335bbc94be67868ba795c5923c30b6c0a48d7f737ae2819716c6c747a796"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "a92597876757c8b431c451d1403d16371473220eb8cdfebdc90471b9615762ac"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 157008, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, true, "e57a46cfc3808aad8691f85938c71787f89efddf5c7cbdabfc0bc844948ef3b4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "d2bb02a1735aeaedd01a7b0eef1bdbf876dce77d23a38096d2a1b37eadb4b69c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "b94b6d481bf4e5a715a51373ac330554b4cb82643071c0626eafa22a6b9295c6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 165296, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, true, "b0a8d6e547379d37b67a45147bc1908ce953964d7d4139572603f40e0c367b18"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, true, "dee7b1bbafcf2060aaaa9739eba0d35b137181c1443cd0392b9d1d3781013604"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "e1ea011632b2a68474708c69e8b2efa752760a18c58985cc0e996a68a75f321c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 191832, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, true, "815a330a8407c9cb1d97d40b3a602495066699d71fdf72829523063604e145d6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "ee7f457bee623cbe35355ed0b5f8acde1fef2fa6c4d7cfc2dbeba41c73c5fb64"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 157008, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, true, "b495e71a9c76d21056ea3e8392a742c7a353502ffc584613812aa7b96ac01a98"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "1e1f7d796c72c81ff8e73df4eaaac369bb842aa4b74a9b029be6172574aa27bc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "3217c58251b9d5845d2c206a33f67846936afdc2c59dbe0d784c9fbb688a29ed"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 165296, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, true, "2f11d0263d1236af087988e47b0f79267f3fc90245c025086acb50eba780ab69"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, true, "a1841b99217209e3a8f11f874c623f2d1661745751f54b758c0185514d16e479"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "6a949921e2bf7ce6d8cc3f5f9eed785613d4924e59aa000b82ae9f56c8065981"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "4a55e9e6dd75fa9fd8957a3ba01d81982dc26b6251be30f3c8e31b2dfd1ff631"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "46aa7125b58bdcaf9f3df4d3fbfbc9bc5f17c263921ba88be026db01ce76efd1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "b58c6918be64145b6137cf928d542a865f03fa0802a8175d52fc413ccf406235"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "503b5625f357b8ff8e045526a6cecdb6d72c6b83e7f167a634cac693243fae09"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "331edaaca12c9e906bcae5b14cac2f529eeb8400b1c3c3baceeeb25d25655ef4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "9c1dbcb010cdbd515dea5392ddfa8c8b54db05697bc4067a4eef51ce639d0199"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "08e21d84316bf1bbe718fb8b1f0c794cfb99962d908e854c4daf8e390d3fc28d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "38126127c197eb2508eb5ba24eb81195d2a216a853d3dd62705f9ca66b2709cd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "ec0fde1c6dfa2745ab3e50e6ae7b4503d2a331dc44518c51ac00ffdf226928cc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "9d648c05ad61545ef230dfedd1dfb66a8c2214cefd65636c60e6da6551cb6f0f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "65fdaa453567ccca54e09043f844a23d76f69c59e303b5df62c841f84db4e097"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "80f79c095828ea8cdba4d329f19e39e0094fa250ec409687f3bdd3bd81376c97"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "4cb33113f5061dfb87debf583b06a826eb749e5bbb4b080b31ddc49559b93496"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "ce4c8302e1ee9b977aee0cf22e527fcd25335783749a9c4b616d0dcb4b64cb40"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "ed41c89e798f56b1eb9cc562384a9abf0016d84addadd6f3dcc4cdd38fa8e639"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "f9401c12b30942770c77c6aaa845f64af69c9fb0645357c6a73700b8904a0f0a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "9141313013612b8e73ab2c0fdfbc45382116e7df5ac52e068006d450d1334202"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "c8e8e65114ce1dbf5fd0004e4630df8310f5975d322318d80305c702381b0c18"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "b00aad1abd71a5295f457930eb40e39e3f236a9c54c1c866bd02a490c106fae6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "37fe5ba384ee96cfaed0e51c1f12118291ffc55786ec800748c9c615cadd9c7b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "62d7a99f2842abee9cea63218c34cd5d77a5d62fd2b7ca6db18d0301cf30fb37"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "ef23b189aa69bac5331121b9fa9cdd11befc6803f4dcd8cca14a75ffab4babe6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "9d127062f5794df746041cad6a9d407e5c8abc5a8858a89e328b5ce70bc3aa67"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "2792868f8312ce39c375b5d528c16694807725e9d126314c49b7cb67188a427a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "36639c5b8a2ee503641889cf8bab8b82bfd83d9f3fa895f6cac13368c05dc943"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "128474be6df40ff6d49e96b8998b75974aa5a9a4e21e709744a0c61c93e87f6e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "b3d5f70e7e57de407a6f4ee8ed52fc07f215068fde6445edf34a57307187e5fe"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "ef483934cb0b8b491d21e139ef764002eb575680aaccc5884be3ce9df8522e06"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "b7633d4b3e5aa376c72d65ed80605bce33df4c3937093e628e04c1d206ff0c0d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "08dab390b00b385b2203f9070bbe1e91f15834ce638387ebed2c04233e13f7f0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "15aeb727f8a1e4df0d1482561dbb50fa17146678351c999414f3b6ccb8e22d71"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "e6881ff4fd21da6d8ec09bf95e9c9173a6df5ae18585e21efcd12ffc88509325"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "0bf855dc3b97ea21529c1214b42a043326cc39bd73de4d42ad8f52b24d12a808"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "3a2e05beb71d96843b6d24d2de4111823f5a7c2fcd4b25f1be3afaf28d7b7357"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "51b3bdd960e36599ad352c2347887015459042d599b994bf90beea7c34e4b0cc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "faaab5e1a693195abbdc00f01f9cef6667bc08acdd6394ae11431144cb075df3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "c89728635e9bdc224a31b9d7dd7d144dda6c5b40dab966e3e5c795ac9d0bf9c3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "276914dfa1405dc4986fabf5686ab67f3eca6bcb4b2d683b79b730d48df553b9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "9933f7f213f12d32fdb2362530593f05016ff787b6e49a37c219f777b90e27b7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "2ddf1c7d21c5e65e499341becf032388d12ba57a62e71b314fddf904b982c3ea"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "15e251dc52eab061bf458642598f9137abc16a2d03a63cece73feb63198d9164"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "1ed991c8192002fe85040e23d0d8ce8e21e553c6af62c6ffd8726c37be3709fd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "6a0d75a5bad53c485f8cc9c42c9ec06c176a4eada6ddb4329a857c40edc70c41"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "0b390dfcccacc90b50715362516d66af0b9e750c90aab768243aa085dafa11fc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "ee7b239af22c74b11255e692094e814a65ba8fec4fe36ab3c4573117a7c066b8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "7358b55bacf4796d72fc7887d0d10f89dafb64fc42b8d6f16e71cdfc00960a3f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "602a13b7b5d280467fb7e9996d7c587ce71429a993067dcd1f575e078e0e603a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "120fed35190c69de6288eb7ea0f68ba7678353453d26a131ce096a54b514828c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "efb83bf4d13b3b0a96f86ddb31d9a57c057c05ecd3a0c75aedb614170ffbe442"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "b7132a43a8ef0b5e0d65717d520a87a03c599a061cb0c781af791a120fb4c9ef"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "601757b14bb8704bf961722b2016ba3548578ff5a84fb73c3de9e37a684c5bc8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "a4c23674bb4f3fd377b7fbb1d3f40a521c6e6ea60bb5adbab8129e979144342a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "14b39276550c0d1e35e4364a2975f2243502d439a1d8cee5bcebcb87a0c68244"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "4a608f60f015d0acc821d85e3eb72033088a9a2e0abb5ced87f45316b0764161"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "07d6a2ac110a77c761559181e9260012d7d61318e458fd76334e6c4829434468"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "807805843089fa31644a7f5bae065dc0a7191c19790d046b67e74de5937e680c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "0e9a249bb9e64643fd6af3ae5de3e3a255f18ac778ef2d20ab1e1b6a4502176f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "d6669a675b85e4161c504acbb4d2480c2c3caa494a45a6fd14aa9db6beadcad3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "ed2d8f00da9f0bc7b42006c19e8ae05209edbfb51b08d841cfc5e0c9edd500dd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "387fdb02ad028dc6c2cabe82c31292fb834556cf7b4f05db3c7ef9d66fd655b1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "e0b4775e88b74d02ad9d72e702f92ae3e214d5022a2052d9bd4489d88ce9df94"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "949240a540cf3316fc12c2ea15263d8a3452760c11befd5f913e6f905217db9a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "95ecc669d959f3edb5d5f067d30d84a61299f8b194c0873773ef56a9f30f2eb7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "3110e459275f7403951cf6ede9210032d0f175f2fec2bbdc9546df18b0c90271"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "0b4fd16d43789a19155875ac84a859429c78d5a2308d699be5100e70851742cc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "576351a5917597114d78fe6cf1e6f11e6cf1baead96274a7ae2979835747469f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "2805f9d0911749629da63f481d01ef0788fd8ffedad78719811a64e49c59ab6c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "d32fd9f96963b207e43ab8c52e517af22df5dbbca54ec76324ab1f36747c70fd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "f2afba5dd1d47fc69bb360f93b9f2e0d74bf657adc5c4d1043ce8f1f9c03f127"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "1e7204631ee4a2bb5a54ab7f8052328bc9f39d066d8f41e72c3eb48af39526da"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "286d6953991d982ca8ad6bee29bdf9635d1c26f13c91644b041884f2edafebed"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "615ffe16585b9644e58458437f79efa6e51a502daef663a218fcf1124dce98ad"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "37ef687c92ab652bf94ca35146ec8858e76bf7e556e9b608be2b4da49fe1589f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "7e7bc0b26d56a44b8bbe7247ed73571d317113d4785ea664eecc577cc828ce4b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "d3c008dabf4fccc4f801dc2e12527a8ddda045496dfb428004ae41c954c91d9d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "b32b360ecd80a42e424957610078d925c8b005bfea74ac3158c0f8db74f3f725"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "c5eb706aba7c9ac2c1fdb5946b68bbf5a31bb09393903d17fdeab1b410c781fb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "11c79468362a90fef8d936cc05019c9ae25f5cad63c3232c98cd45ab816b84b0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "0548c055b8767185f64bdaa7a2f902dd0b5b6cf8d954fb63326a5f8baca3b8f7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 115024, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "f8eebbb8f44c2f21b199ac50eeb8e53e79c84e5e6ae0003c6c47f0582a9d7498"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext", 114928, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "3b1763cced5b3f78aed30e79663bf3d97d2be054a6f9e397c74845948128de08"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 115024, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "388f6209ca3f4e117c19939d4b7821e8ec73e94a409235f1cc2eeb6250bb3aa0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext", 114928, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "9081e9db5f1547d677d34424e8bca51d6c18e962967adc377488a9d29a1f9c38"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "e68c38adb871a9ec808f5a899d8def3f39bd8a797fbae0d99412a37b99670027"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 115792, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "64ffdb3d307778a56c83d79fa6d6e1e2955b63dbaf488c840bcb5459653b5b9e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "1d1a603240f8c9025d5e1c0e7dff3cfa5cd8c9e7947e123a1aee79465f6def3e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 115792, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "adac46f5f1ade8b07a22f8a50393f3c15967a5aa3ad43de54d0a29e59b12d156"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "3e7926c94680f08432179f45c1cdea05ad1e2d6617ebf735b2f17fabbfb2987e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 115792, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "b715709d8e974db53cdb8822e489cc9657d27973b5aca09e266a377f72577a94"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "207379330fc8124c98e468e9787a4f82174254cca2a94467eafb827623cefe7b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 115792, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "5075094d23a3ad2c026dceb2b1f0d21ce9fc4511620acc7dd56b531579474ccd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext", 115024, 512, 0, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "d95d1f634d06cc2fe04180440f5fd52f1386f2cf72020dd8721824a17ea19b08"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext", 114928, 512, 0, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "1278d4fe49078c30b04d9e22c46414500d30f0c8683526feb591b468f82a998e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext", 115024, 512, 0, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "e9ac0709fb2375938d5ea31704f9d9ac9272a86d1cc15ca8f36e9b769b9c6a12"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext", 114928, 512, 0, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "f75f088af97075aa3106cfbf7286e6970a5b16f78b4a09c5b5eb2db4a482f1c6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 208056, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "7c9bd182e193851b11223190ae6ba2d38f4b298771f06752eca0204695a5ef73"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 208200, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "2cadd577184dc1c77f36ae130c29d36bb65d707f1bcac1ac285b237b1185abad"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "91ee781186ad2e23cec51cffa968eb0bf361b66413c25bfd701e5c14e548a2d9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 195256, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "a51200fbcfbf8832925f4122f9fee85ec50228f8eca37206cafedca6eb5a3b49"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 195400, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "2106a6efd857734562fe3b63af9e275d53b8786d2fb2276ea359f20b5638cc8a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "cf18d9a675a7283c79ed1047f2284ec8a9f6d5b7f7d8ddf8ad18039f8d811bdf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "9455dec4ce0fea515b7f962034975f81a9cc33907d210e6f23d39a83c6ddaf2b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "e1edc880d09b0085a11ef7c878572a695755d4c1002dfc14a7aadd5d78b5bd25"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "6d0a2c944f133c83ec42ef11138f5a41b7de66645b21362f50d078d9485d3e44"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "c6a7fe0cc0deae6b00222b536233ade9a463b9f840a90114dd054705bb81cd88"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "587165b41b7042974439441920f14f41f9648035f748401a91115dd42abe5e5b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "ef8347b6e7b343deebf222571aa82d178b076ec6cae9311cdadb9114b33f9145"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "c57c93a1b73cdb0ae010a7ea6607a6ac2b15e729e96ad335b5c90fe2cdd2eef7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "abaf2a4c8387dfe7b022115116065001c18eabb638db42822b35984029b52f1c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "3da193dbcbc6a0743f610a03aa3ba9d4fa191cc4ffda3e32b4754354de6fdb7f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "41b01099c60d7d4cd6962168e049d2930aea5aefc92c75db04a2da10613ff455"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "db241ccdc01b6b85521da343e1162fecf53989cf1298bf2c07f98a5778fcba19"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "cb9e9e56a92e5c1a3a5864192794386e3ecd855b11d6ffcd02aecedfe867c2b8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "a36642a8cc85995e4f50c506c8ef41f8c55642762962c7f894d69bc39a2ed247"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "9f0afadca5f5a2f0b72ccff1440001ab9baeaf46dd2fa734809f969786a408cc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 208056, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "6178cedab11b6c210c03e6a39a78b1b955c6ab68e0aed2a5acbeb9f098993fd5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 208200, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "bf18852d75d28e7296e7e7c8789ce54ac24719d98023be3e8ccbdbe0c70ee6f6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "08cdf8b419b086efd678ad9c79d0c1ad96aa623fe9701c886b331f4eee9ce237"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 195256, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "1e510aaf3d4ed66c10defe8e247ad491eb06d4fa60484c7492e5d180363c8a30"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 195400, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "e2036c7cbe896416bd57907650b188d7098132b38c0697220178cc1c668f348a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "f940539f48214c7281732ad7ed5ec0319014a9b651cc08318632668ed254bbc3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "3ba991e7e363e31a12c3a50d3bb1c5ea8aa51202c72c56ef8a2906d092ea8227"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "a8ca7e168d2cb5cb23ea478df9f935fad075cbd23a05a6c672f87f04b926e470"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "f10c3db6cd3e51a87a8f23947a42ef454cad6397aba1fa92988a3b1910c7f199"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "32825ccf9d63045ad162858d80ccc79fb53d1454853277f50e82c7614befaf46"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "e81f8868f5af41ed570b1c9789da4ec37633a837b2c669c9c0c3aaca319308aa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "d9a4f7580ac5408a98e42e21422cc05704c58a172ac5828a7e41f20c74955297"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "aac9b6cf0d4ac2db6312b6e76013590e0dc0310b0af9eff82af551535577fba2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "45a6bcec54f021f64c056d3ac1c19732ae36d380217d76146c64b1442a20610f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "5161336ff3e54d3897f28fdea9ca6bb92b6d452ec94cdcbd14d77cf76723f256"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "6c94b54fd30b4fb5529844274f685489e16be771d3ab295a81d53724b643add8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "cd7901bd9b139f7fffab4b89534bad23be9bfc02f86a4b70a68d3fdf89f1271d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "e1ce6f0d524080bca47530fbba12eaa5725d4504860eb9851d71cebf7e703521"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "03dcccf333b9c7baf4ebff02949add37fcc9d531a8ef59f26aa16bf73368983b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "94e27eed7a545c6f53942cb01520b9b5240860010dee098b2237b7ceb03b401b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213160, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "11883a6c80eca92708cfc92d4b6669ee79845bc6cb7efdc79e46180bc6c00bcd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 200488, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "fe44514ce03eb4dc0453de6a3237f377b17934c33eb0a01e606b3f9245f28e0b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "0256703288c61e37706c4b507792389e237dd8c8163861666d5de70c7b31e5b7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "9030d672085234484d7ebbc52b6cfdbe5efbe6314b5e00b05ec66a646d421fe7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 183552, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "5cd7b2cbe8cd6a28e5af0e6debaefcc3d6e72aebac9a1f937822a9dd9fd14574"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "558d5b68d6721321993c373dfd6a078d82da344c755ed77f1483d067cef75347"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 168832, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "08ddcd441bdcb05be794e7309ced858371d77cd05845d48927f13668fc3fd58f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "0bbc9686641cdde9d4a090378043ee7a99a1c53140c487552c38ae28d5a46cca"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 207544, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "6b7d1b4d68aefaf65c3885eede1ce7a5cb587cb3cb6e0171bb375e83bed06262"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 207688, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "4e63a00d008ac31a0cf8e2a74f7028254b08fd7cec185e7b057f4a57ef576bc5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 207464, 384, 2, 32, 0, 3, 64, 0, 3, true, false, true, false, false, "9875f38c48339529e13680ad4cef24d58ef8cfe68bec0bcdcdcd98e4445b272a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "4df26588008fb448d2d0e5332a7da12d5ed20b92a1c5fcc5fee4b9c6bb80234b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 194744, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "96e3f9c9d95d7df4b48a8f47e16ca28efc71ec28a8a1bb2decf59d6fab727e7a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 194888, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "fbd66251434cbccbda2691fce02d1838b00e85c5daab22fb6411fdd149038f1f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 207448, 384, 2, 32, 0, 3, 64, 0, 2, true, false, true, false, false, "9680ed5f74957b160c867e6745d8ac4040d7ed0d21045122aab6bdb54660f7d5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "78a0188e57cd8f2d4ee3c1b1bad25ebfd879ca969d6c3164af3c09eb1aa11b6b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "09c25393fd92bddfcca011da9eead8d50dbc9472071917b1598f7f13d71f1592"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "42fbefd704738debee62cbf6192be423562cd61414c5c6fe4c11c5d9aca56176"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "707c49957c66e8a03c6d33bae6f1e87f4770f4e3afcde4c512f96b990d76b8af"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "d2db9004216ac15d4970799b5fc1035bb06494dfff581695798d3ac6b8c25581"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "9f6d1b79bb715223117fb81a353d68c6b2e50b888645bfa935e714f97cf0d8d4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "86e6b974ea0d8a1d5321b21b9c137d1a4c37b90f4dc20d836afff71d8513d4eb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "c3255e4a6a1407887d496059aad946f66eb51f28b16c1391a24109249e61ba87"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "efba298683b69002a139aef7179a4695779cdd8e77cd4791061f88ca2ca10ef0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 207528, 384, 2, 32, 0, 3, 64, 1, 0, true, false, true, false, false, "be007b9182b1f98f92176f43bd7af1a0fe410ead0bd69473eaf73869288bdebe"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "ec221b1540dab161a8b2d8c404adbc54d4a1457aa7e13cfd27b71395d8a6d9dc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen", 207432, 384, 2, 32, 0, 3, 64, 0, 0, true, false, true, false, false, "c7b8a9d78410bf61b26449e2b35226e41662945c6aca566d8ec9bfb8e82a4925"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "fd30ebec1e3f5ec1555d0ded08dfc6b7803a29d7ad9c729d0db520252f95095e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "18f7ea83377867a81dafb1e1fd37600f9a0a0a0b72b68c37b22624377bc1ffea"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "1485ede398391b708bd10c9466d57ba603175627dee1225369f89c4b80259d5f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "d3aeb8c8250518eeedc5f57a715a1c74018305e77e978cb3358d97a48af6c1b1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "fee20a6859e6974648bbb1b42e12fb69cb1253c1106dc1b1782f90c0e6ace327"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 207544, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "7eedd8677c6a5e6991d14078ec82535324d6240d87b3dd0a923414b3e8fc57cd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 207688, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "0e3ac8d41c2ca7d02ea91aac7503b9a5cbe051d45cecafbd1ad582f5117ab7f7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 207464, 384, 2, 64, 0, 3, 64, 0, 3, true, false, true, false, false, "d00f9195efc6c59d364ff31a84ed6af5c2aa4f3bd833414b9210ad8e05d454af"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "be71df450da512a74fc116814c34562518281e73af3ad0243180ed32b9638f75"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 194744, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "fa27f6d4a396574470d17b661cad6780856f34b5f27662e596659ee9d8b60b21"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 194888, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "2ecd1f03824adbec4176df5da08348a62e93fc326f4cd96d8c44fca3dbfba0ed"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 207448, 384, 2, 64, 0, 3, 64, 0, 2, true, false, true, false, false, "8a260737d39dd2ee2d613d9ed220c1a3358691f3105cbcd885b5a1081c1b6c2b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "a35e628219afe63aad861637888a75ec8bedaa47523e1999b9eec6ff7d22df71"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "fecc57d58b617489ba27ee74e8cb598d6da0d789b992fef9eeb1438cf7613c8c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "82692572fcdb84b90f71109c74f76a3ac1ef28ab579472f13b7226eb4436c283"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "b95c6a1a49888853d41663dd387778b6522c52c9ec90c4b0d15333b321b0fd77"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "45a6a65eb4c2a64cf715ba47fe3770380402747656bada7224e053f52c5168e2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "4402b367a6cdfecc2c3a1a0a2120f6d16674fa588146c3fc17d4d632b5eed2d8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "f51b2b6dac34246828cac684c834b5961dd879686f34789bdc706a1896a02fe4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "726c23f7c03bd45ced7f1ccf4bb1610c56d0ed08f06801672cf1828ee7d546c0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "5c3e8cdef0badef1ffb8a5b9f445e826c9a6508f263ad1b00d367217853df5f5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 207528, 384, 2, 64, 0, 3, 64, 1, 0, true, false, true, false, false, "c442c9ed022c45feb4bbe0dd53ca74cc191519a2de1701cb8197cec656cb2f7a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "844c7f2de654ac24977e08ae13af631225fa6282f91eba31b6771efaaee54f3e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen", 207432, 384, 2, 64, 0, 3, 64, 0, 0, true, false, true, false, false, "9016a7d9c6993b9175cfdca283a2659f41b89f94b6baf19caae062b1bd3ee2c8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "a60013743ac30586c8675e2b0840774c1b8f630f28dd87918583e5a85764e92e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "e9d0307d36fbe5969c007bf4c8afd0914688371c62ae862ba740328eb15aee61"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "e7564b72941fed9733c9ddeaff6f99dde89f039da6549320d11985a4dcd58bdd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "5d3d694fdeeb6c8c2282e61abb584320a4324e39d79386de7e2b19d1bc0d505b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "2a31c1848aec9eaf5de7d1cda991a3058a5c4c935267d2e5e034141045739c9e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 212648, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "03e2db6430e999fd36a74293713c8103d3d44e1faf0274548040629dd0a0f119"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 212840, 512, 2, 1, 0, 3, 64, 0, 3, true, false, true, true, false, "94e2776e2f861e4877124ea590cea613ef2a132fc1693c0c81e931dc9570bd69"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 199976, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "592413af136d57a5955bd84ec7a03af7928e05b9b61e7599887492317bc7392c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 212824, 512, 2, 1, 0, 3, 64, 0, 2, true, false, true, true, false, "b9a608b8bbcf18ff0811b59590e515a970e679413f69f74b306220c50d64b26e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "1e413d40e58042ca1629bef99c4c7567003793371b517dc8b3a5cd9abd15927c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "549e7659418a16abdc9b03b7fff4df0cc48aae656dce328cf80df4c85f820514"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 183552, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "698cbae0bab0adc9ce56a75919be17bad430ca50dc8eec7d8738b57caf6fe094"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "0ccf5b4f9e728afa90332229e082f736a575568a32f78935b7433ad0bcd4db5b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 212904, 512, 2, 1, 0, 3, 64, 1, 0, true, false, true, true, false, "be2c2a09a9f5298b28d3425b5efde96a6b8ca5cb6fd168079365735e3b887d94"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen", 212808, 512, 2, 1, 0, 3, 64, 0, 0, true, false, true, true, false, "74eca8a42783f5764f2cc24e386566b83982ad25a0606a560d2ccd30ab1ec1c0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 168832, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "d9138a0d5aab47740c4c30102c47d86f67ab04a39acfce835e11623bda267a40"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "e929629e533feeb0313a3f02d868fe5a256d73b28960445d99502ad82fd8513e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 207288, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "f0585d07ab90e5acc226f820ab09277a5bc5ce10122f81068d7edbf9ba34ebe8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 207432, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "ec1251cd0d1f4985af990fbdeeaed6046442fc5512f1b2f08b66fffd98013375"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "7739ae51223482e5b74ec86530bb21278dc90b5887e4917b42ae5b7fe8f71662"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 194488, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "08e195a326cf139a63e4cfbe6e013687e099d8f8f7aad093aa3e6a1624adef7a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 194632, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "ad9fa5698fc73c30816a7b0e8a1966a34ff5c83459a90eaf2da0fcf35f4a1cf3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "5bbb0259fa53fda8a56f74d789d1ad3de7ed624287fe32091c4391faf32442be"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "6d93635a3b47371fbe78773564fe6d4062348bf3820b48da869616a209c3bb54"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "fb73a5efc84c22f990b4d64689744ab7d632886fc882481da3d99ee80bab4702"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "c0f1417e9fa428dd924187bd2b198b29d5368c09165fff2bfaedb282dde11537"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "cdc3f8f85ed3ec86aa4698b18c2065aeac0b4d4a1d031f04bac4ad21771c39fa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "9156b41a0677173c364f176336095a268d807bbcb092ce745754553d1f10ec31"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "610577dcceffcdf880e8140565d3ddbb96c6896fd554ac9be45a2a0d26a673c3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "2877dcfbc5ac9119fb5337fbafcced79edb06843c86b3884792afac18d46675b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "6154353ecb6303d1093f26070e9a4e86dceb9f287c28a58a7184ce4fd854d21b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "e18965b59c296da1d219c8577714f11104c4c8c36abca8bfc4f0b51574284619"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "daed983d36d8760aee71c1b658479dc54400eb253c81a4ee0c4daeede48ec446"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "fab9bc7018f46b3b3a973a32b986fb0e24e09dc4247108a483179529700587de"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "4b36ac8e8b4cac846b1167c20d03ef7ba609a779ef0468b7812c606f7ae6a806"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "96ad692adaec50937b0a543133f700df8b7104e9748772a274e2e6da5132555a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "8f34cad2b1ea2c5e04639b8592803973d2f6490d7c62ed7a8f156df51d4cc0da"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 207288, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "10cbac8a5313292b6d28c65fe5dfb95f7dab67bcad1544c41497b319ebf5166c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 207432, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "07850a93cb527f22151b3147fae42829983a295b41789e98a96f3f109acfd710"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "d0735dd77e951db17f075744e4c1e89fc01116eae0a759064d1d135392cf6921"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 194488, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "e81c0b682cd830bfc7b691a942ed55316242d05ad8657e03a7e69cbd249b99a3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 194632, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "b96a7a4b058880dcac038dbfa53993b291dd7ac68272fc3b018f2599e321c65c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "afa737cc76c525cea3ea10cf0f4fdc85dffb5561529f5c507a43baeb4845b67f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "532ba8e216ed063a965bbd9b2923e8546fe3c6f135ef47225b8d90869aeb3e5e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "807cc42eb02dcbb9ba36b24d7944b122eb2d1a5085cfc040e714d36fae9824a3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "e64b23482897715b4d4abd451971dd02c8b6a9485ee94d96106e003a8cd91654"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "b593a2461c9efafbdb14d9022299af56714623b3bd26105fd4fc50df8df72158"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "061db1fb747ce3a4894faf2d68268eb860ba8af75fb616c9c87977842d784c0a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 174256, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "1c5502a3a578f752bd17d5b2cb177bff8cb71988fd5832406362da6f7950ae9b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "14145e9cdaca8a7816c3856c29bf990094e8197514b7587cae844c2c4551f405"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 174400, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "a169f04bef3ab027dc9a6d5c7cdea7d5c4cbed6aa4ca7ebaac8d2776e9049891"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "58f90c8d8df0aa0cffe55dfaff2d19d93dde27cbc0e3b8eaae05c9922465aeab"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "ec25b87c44e949a5168e18c651cbae2f342f01d5bccf8d0b416f9381a5e7b7c9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "9c228e2a3b546fa815e618bf6ce2060996908c9a3247d9338421467a92e93421"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 161456, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "5336cbe85d97d02e2be32f00a3f4911484f4892e3399ecc21d197aa29f29d9d5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "a90f57a77dfb13b42431daf2ef7ca22631df338e6de6d86f2329ed75986127f0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 161600, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "2787645b1af5283531f060b81cc6fa6ae6d220261491c515be376732e813039e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 212392, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "e322e09675d0f01381838ca612840e3d10eca9a6ee2a978a7e4070d78384e9b8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 199720, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "657ca8bbe5930421d756f2a2b8d554e7e5c560224f2db803f893660e9251118f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "0e1422ba79d9192351c9de4c76b843eec0ca3e4b5da3fbf29fca0b2b52d04fda"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "7ee2b587c7f5cdb51cc430c7699c0d369567dede31df420039504b64c5b44c1e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 183552, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "4de57362790a9ae0150f3bf8a5349545ffd4e07ebf90c4304f48e9663f6e3f72"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "5137fe9abe67e183fe3fe181c0c902f523829d2c7c2ec2a131df02962e588a05"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 168832, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "a9994ad117e7a297ad768f804399fd59571a23b40dffc0311b059cddb6b1eaa2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "c81a14718a7ea386efe93d6529492c22b1889a2a01caeec67e539087747acb7a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "c419a65ac7a9eba5b4246f431a16d882cef06d57c66088c00c150d781a7b5ee2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "cc9a541407e569834bc510254c013a1ace8346b94f51a78b81d50537aba6b06a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "a02dfbedd78bfae4440e162274cd5ab0472319098f7d0cadfbecc5de2326e693"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "cd74a07a2ec5f87c37de35f3d01c8ffb88ae23f613fc40c5c0f58e1425679271"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "e926a587f7fc80afe93dfe1c94a4cfa41bb0cd6d5b2c0e96ce10e2606bb55f69"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "0f8f43b718ffd638bbe811d9c1e93aec2c95d276b61b6b38fe53227c54d6cfac"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "5a58aaa8a5ca6249df74c84f4a3c7e1274a6f513c5d7d2e5c9eff7b6e19aa571"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "f09b48d5657cb1bddc239325304e0c5f113f3fbe5636f5e0d48a59fe861ab6f2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "e3904214e77dcfdee7c7f035754f9c5a6bed81b3f1da0c2f8c396d865a903158"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "d38508518532895abbaa209bca6a8bf94297385f462d4563b5963eb031c38b4e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "adc116f2b54ee19880299ce326a6c758259f80e2adf07cabe2baa2a576efce18"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "1f374b7dc2743e84df480d4471edddf040108978c98eb03e5ff21bece066c67c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "639d5389384fdda991c0225d115bd65dca70aafe849936f253a57f7f12654871"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "64ac6bcbe55749bc00435e45808bd1071226fefbdcec45a6e9cb072d35d1e509"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "1434890243e82755647983afe11092ec3fa120847f2734bcfc614bdfa042255b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "cbe3080f69db4ee057713cf09f0c45dff85b38a0d8985716f9271c34529aec05"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "273c27fd9eaec2edc49e20164bff150fec49961fcdfc817f0695d71f41a4b570"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "4dd703510836350b4cce74e4663f4618354cdf13d2b69a1349d5698a8648dcba"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "abfdbc6d0cc8d64483d7de1e07ce5213c43e8f0dc3c015c8c43de8ab0c02bcb9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "a51fbbc098ebf183e19dbcc0f6a82980f85977e9b2e59bcb6421a5d45e8d5deb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "808256a07ac254eb89147acee7adf23efce852b3b03f28288539e76b42d0f539"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "ad15980bb2fc87b05261371e5f27b72d0bc4b8b28fcf41f1faa0410b51da6b97"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "78645572d3efd35d5bf28895a8729d827c2157e2800236880acb45d063c37608"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "e56eaaa8f3918a300a978f3efe91f5e83c3b061f3b43f3706c4e5e7b86169f66"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "f5dfb6384faf7d5350b9f9cc877dc7f58939d42ea9849ecf8a317a251109ff0a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "9d8e33a95902a02cb9da42f668ff2bb3dc39786eaa98969a7854796d30df2ac8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "28b2b186bc18465d1ee11018ee7fe570e2c2608cba6cc5c12c35cbfc8c8de59e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "94ed36f9c2a5266a2cbd5b44ba59890a30384e6aa6f8d33f6f7af2743d17ed8a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "0008f60ab21188a52d56070844087b320d0f37e46a1913553c60833de111a22c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "b2c62fd7d6dd190ac2c7f2ee09a2a6a41a6e7ce2f1e78abcd005d25598a202ab"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "1571301219c529e8a53ba7b710dc67b6ad52e6a1914f0bd50c462e71d4e57613"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "f354858c5c7d65275fe9682e845a72eed19ca1a096fb700492b6fb32eb2d51c3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "cb8ad3f4e9649b817da4e1c8d00c74dcc3e6f3a1d01e6ef80fdcb4995d7d915c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "98dd3f3b660d99f2a0b87f3b71c58cf0622a9a3213ec3bdb147eacacf55d8cba"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "481a4368cedc4bb9b597d39d2c5546dd1f74da9c6c521efa590ed45c549d6a78"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "0d9b7f6bbafe8460a13e3becf4cb8f8190fb34e2a4ec094f4be8b90d13a3d835"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "0cb80b7f61cdc8e3458c4cfdeb844936e8035de513fc75efb2681891af2c28a8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "98308725737ce35511a2e647a095ad332255f0fdf1c7775388694857f2103754"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "b5aa718cb522808722b61cea92aeee95a23a6b57f55fb577227bfc9c143243ca"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "13da841ed34b16a776eb07fe005204a5f97a9cbafcc7fb4edbb54b01307752a2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "b0ff8b01162869d35f0b9483e0f4270705637dfa9daee13b853e5ef689895cab"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "5b73f7eb51e019ba223b1f8093924ad16148f07fe6cc365a2ee0aee1a36fc8a3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "216a0f437e5bfe8bfa6dd78417baa1f7b0a31dd319307f6bb865746b0c5f38a6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "a2b016fb74ff1f3434eea4a1a63c8007700caee81f6c46dcf8dd012e38513341"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "d693322ccd2410effbe9edf748075530dce2fcd6cbca76e3e6cce396c8ca2ef6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "56a91ed5ea09fb3bd22ef9f59ffe82d6f265d73c890f849962e636360f55f8a2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "3a5748399811f97d0a05c88ba0190e5e657bd9f88c114969909a9b51be641c2d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "297ed6277364bb7bc669e897deefd268af966a4250833012ff3f0412fef307de"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "47fb9fc376fedc5b4fb8245710dc15820865495236163744403199f6bee18757"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "9130fcc0c1d0f6beaa984bcb93b35b9bcc55573a1dbf731c0d0caedcd067e300"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "4eb227b4462ea3dc11e5c6bfe106f8b948a4067a03e42f4b23957d74ac9bb1e8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "a7ac9f929275a4f85e59baf94605a2b025a2a8388a21859f2acb9f6facd3afdd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "33f200aa6d8ef8fda00bc20afc56d880c59635f6203f8d61541b993f4c9f3d07"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "3c43737d9272593e8df7fdfe3aced61b96bc277b97a0444828963aaaffefb169"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "c6d65be993a4db9a8d218cfb7b629c347a7cc456262eb9b5e6ad96503b896941"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "ea1d7829306a80a2a6e7bab946cd5cf27461916c901e1ed25fd1c3f8fae39d97"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "7403e2485f97631844b5b7ad9f0dc6f956e710e7670fe9e5f921cceaf35352dd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "5f0b74c0ca5bc989086a3120749983b7489bb56864fed95c55d24b6a92ab7af4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "a94226d2b6756c4ccf0cd4f4566a16b9232124e43b329c43d71b3d4de57de158"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "7baea4ca072669251180e86b0f4209a67e1496de7885c32996b1534d4dfd75dc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "31439366bc1bbf17abf7879bbcc907f5bfcaca4130dcd35ecc7da398601e776c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "5aa6f6fe1daa67872c44b193fd7eaa9f5b49b457c1eeec258b18bbdc0ec37d84"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "36de411aacfd944006271067c249450e27f10fd4d056e9f9e9c368c493b9fbad"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "e848bad2dbb0b714f53622d587ad4bd42458a192117e56bcd3bc6c48cba9a771"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "06e301e00e9b3df88177cddaf70f1e11ea3021817e3b7c62cf3da07211de4776"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "055e2154a3f2c03b93c2ef8404089367fc1106fc515b91c6d44615c95ce4f555"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "6ffdae6b22d2a3d00d57490199fd5c4d3900f7b18ed0c88beec875e5caf5a20f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "00a9bffa48d180b120a911b34740a07d51e64da04915346d7d55400b935da323"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "c53c07eae654b6850373745b71332c334f7f818add9e913cbd7bb681bf9781a4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "d584623f1b29a19d07b45dc2a1b542eae468be3c5c5e3745d110eb9bf972d623"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "dc7e9a11696957acd8f3378ccbcd150075418e1cad2296252748e078e3d5b228"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "3a0d697e311e88a8878d905643f9c741be61b3a7effd21c4a0dbabe7c42cf561"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "486ae91ccbe1cf9db32cb7069e00607bdbfac11271ee31f5840c9b757da0e247"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "3d3d667825e6971e1448e59bcf07b78374059e39c64a0bf1507560af66c23c7c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "b7cee301c632a3f34c2cde7ffa303b3e4f509650a071e1ecb22550cb7c92cf0e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "276ee3c444f1916fc8918b5d1d446de80ff918f6b996d26ec419bcb925e025ca"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "427ed8a392f2f1c7ae63724857010ce67018a3cdbda9d61ad13e5376e0df5863"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "31d04a3141b272961e6faff17f1921a76769cb1d6c7a27be305cd9564c23fbe1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "5a62ea6696905b90178b94ee1db176cca5931f9538677955f60eeaaf3bac2348"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "a6bc515eca4ea2d37c759d5aa06b4d0db070042b8cfc987bd1c2898a50acc7d8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "49d748fc8b6c58dddd81a0284ebe364b470a859718b2db8eea1d0f22a4163ffe"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "d9b32e6c4541eb4d13fd3b7c3cb7a3171828b2d4158b943db11f05c68570dea2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "70f341fd158ae165e5835f1af9f1e1ff580eaf831fafd3394fcbaed010d88d0c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "19a185f2a6d7694bfb72a88cccf71daa7d78454fb9b9a56fc76f3b2c0256cc5c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "3a7796131e88542e7aecbf67b88a2e78ff68c47bc00819fbbe6d7ed4a3fcfee7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "8f827fe12cfe1ed11a2bb32e1de8c7f4a1e369f392a06be699d2d81378e030a2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "bd464b02f759bee78ff80744396bb16153a65c1350f4e95b74208c7e67430f27"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "89d47240d0622322a89ed9f69626b53baa24f4cbcb3f827d4d136c8ad06fecf4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "58d29a9917e46043a065e58fed91f8d4412a14fc1f1dc593920f72d1b8d76263"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "c324f38e6a57177362b9a616b711484e3ce1638f718c00e2bdba3326467862f5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "f204a0951ffac01b301a51c9032c95ac60606244f497341d133bb94f1c30bf2f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "38e479ebb768acdf70f8c5809eae646360299fe4133c526e0fc7253c8be42246"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "cb5235dd312b80996e06567dd07222bc6ab718720920e0cf450c09bcb8a69760"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "36fc561c2967ffc0fdaa678b2fa8a2f3d9318c8fad3795bc6d1240ffccb89ec0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "2d0f0aee7eeea2d0f603bb2eea749d9c834c88471be2b86ab2acafd813c0d5b5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "ac139994fd351b01385d29714e15fb36e9a5626a16ca29b6e2e2788f7c6435dd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "50da162532974f997acbf344cc82242486839f10dec23053d871bfe630b4d2ba"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "3c2181448a989f6dba267b039a04737867df9a227c5a51b938c39747eeff7f5b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "044ea3d4cc8359727fca75fbcc095850e83cd0ef8a0b6efcb6013534999edd7f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "3484e8d7ed2dd8ce230ce84d22b77ad051c8e8d54a88068235f934b90e772f19"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "43e31047acc70ae319d53e49756361eb9ae8917a06de70ed7ea4382f8948b64a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "15eaa70d7fe83192eba7f3e8b1e2e982f285b51d8238c5315a566373c21af4e9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "8c10b07e48c919d085229b9fea043268f2f56f3f2c3b7c82a167b576a148b272"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "6c69d282036b9cd642284cfb1fb8467adb814eac4f529bc70094c853d6dd636b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "864f0dcfa1ab118c6efeccc43e6ffa04cc91ed7eb1c836d8ca969fef283487f3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "43619b41a44995c00a7e617f5141919f3b6ecb4fbb200371a5453e9705809dda"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "8851dda68655f6949917da0df73447654fca60cc3374d97a7e6f3deccda26e7d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "8f8a914f3b3895bc49b2f9668478dc28b48ac8fecdfd39c9642721ea9e15d21f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "f67db175b2580fc323f6e5939ad016ae3087ad1ed6a979a2cf42e35e5e5715c7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "91da7ee3582e914bf144bd9af61e2050a3acde67d4e85639161941a5edcdc83a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "7d0138a22a2224e8d3d665fbd74e823281e7d9c52d2d8ae772124a0774ea939c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "5bb7440ccd83436439a9607098400ed6a0b48947f50773bc1e0e0e8c692edc59"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "3893a62f7b248ec5a5b55f5a7d7516fcaf543fda09f5ef1f3bc5f43ac1cdce91"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "92018a93db214f0d75452e84ff068976e3fef292f6fbf12dea151caf8889c05c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "7d43736960af4b4d9010e5ec36ddf04bb4e7b8e8cced793b970bb9c526fe3080"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "6346837fed56e89e83ff982c8a8c354b71be6117a7368acd652c3a519951422f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "9f91e318b1fbd92d7f5a437f344291b0859c662c3b49865f2a17d148ed2700db"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "1a643d142d40c6bff29601247f4ae61b564a8373710cba989a60569a7b5fd483"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "c364e1d1f6810ad9d423f2ef0bccc3ce295c2f090e49def55bc5d6a5b376956c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "b5237db5502987307ba2b1b75348268c18c9133040e196e893a6cbb81e89913b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "a38d58a79b2f3a3b5e3d07069da0de2df0701ed888ee64e56fcb0ecc55c49cb9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "711786664530d483a7d9251b3504297f253a8beb03e4cb25cd3261967a6bff92"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "e107391b6c5cc9eb116024f91d2ffeec6d4a521630ad19cd12b333a79406fe5f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "c2446268836311d4a8b531cce3b5383272650fd828ccaf6a74fd5b06f55fe294"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "893c97870ab93a98001989645632b4c49f57f4236611966d07de350c1f54870c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "3577e44abf77771b7a4254ddeb5510ce11925548680b4950bc9c457a88175062"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "d69f563b1073674c4681ffea2583b1b0a8dd0a7df065f3eb4d83a2bba85619bd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "c362e7f058a24cf9fad0f4665916b7b5ba7a4b08caa88e24366822724d21eb21"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "6696dece06db2180509b71c1bcfa45fb17edd7684d295deb059baca8702e0367"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "e188e9755ad8de2ea5b831a668c8c916f23075362c3a0be57498e1e1c19bea88"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "03e294fc28f79550589bf6eeb61511f064c62d9a40eab72ed20a0d949346c7f6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "8f7d26fa4e8bef02b36b351021e4b1c17c44485e97043719fe4a13e3a1f94245"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "4d1c91f1e52186d40bc3e834911783383d2f78a90699c576e7acd42b0ff4e22c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "4f5240b5ed5f9e7bba3162b112eba08d464d2a335e3db7feb835fe2c8a1f72d6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "cd7635527ba22b061b995fb8a519b54c1847b3a81f7a3b996697fe5fed9dada2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "f30727b85c87533869d825905ef0cf9073887bbb6266915900f0f07e425455c5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "bc75ee6cab5cf9d24f94000904fb7b0da5e413d81d4dacccf4fd09982eb20084"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "71d78f039a76322556a760c52c82965c678e571ccdbb79db138d8c76c0f99138"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "61376f08bbb439c7ba2f38b6e62e36d3701d97c9519fd7618065d5a6f317b105"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "ee36224435e5ea570b65017db7d4b888d125b26cabbc1c8431e3d37296da2a85"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "3ab1b163c356c0db9312c03d7eda8c2baebc2d6954a6b9a7ba9cdc0078b59745"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "72717c76593f38b70d1bb7dc70e0ddd6ce1ce4a684275d800c8da463ea865764"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "2c34a24c8dbda518bf87d0c86484977a489307010e4b1ca533a70a23fc173532"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "7fcbfc6c818b02ad15a17350c9bb350e1887f72aa3e7bbce6f21bfee1596e672"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "553531d5dc320960d06490b494e43177006b98d91085a5adcc404413f00e59e8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "1162de3aad1a5ce26d251061d2b70d43a93882519345166eee57c090d13565d9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "86b8e96d2dcf91734aefb39afed117e9071c235d8abd66bf368c96a102e53c73"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "ddb690c97e162bf91bbdb4c1ff6b3d49fd16a79e94540e27d89bce75a393efd8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "0bdf9583c6dad9a790f353dbea0e8e95f2218f05327a145e9ff6b89ac1346393"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "8258d574075760a64e3fca124e8e11daa83897d792ec2b9eec437972655e0640"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "609ad0d36aaaaeee4ec799dc3749b843f3e1fa4be4dfd0696b6f3352a7037521"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "731b5b3ddfacde776a3c2bb5cc17339d3e354d9056f46cc7b64d76c1f476c22a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "aeff9e3fd0d5320f54b1aa386920ac327d738ccdba56cd699a631ff3034f05fe"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "7d970dec94b1dfac9b1440188d693d16d4ddc12be991c4ada514ed593d503663"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "8c5cf3c266d9bdc3d76bba1a91c392a908b8cbc81b51c7dd639db66184cca747"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "73a22dfc36f171617a30c4ee22c8c18ba420fcc604a17483c4d872d475fddd8c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "a6338a6556953a01bc9a0f54c26fbc292a65e457a6b7dfe053b59afee9119920"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "c6c912c41db39532bdcd61827237e01bba7ee44e3282bd4d174f97b46c811783"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "c862e8b19e83a12c849556d97dd4a3955d674b869c208002a8f10c2c6e01fc74"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "8c01a3900e4b4dd422a694cc29757e6d34f3563d7a2ccf17b2f571295957fc54"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "ab64be9074bc9a9f2fc94ca8532c4203067d2920537b42879ab1254751104329"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "4fb53b77a6794f46c3b338696e07579ec113aac31f3f48e07ec87b06bdda0465"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "aa24208f3779e07ec2ec7093bc783081a8703c587ccc7a5a831ed5e4d9c74a61"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "64ab4dd19d280e041848d6a3f6a60f735ff52dac7c0fabee0c8c838253424393"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "ad48602affbc9d38605606127ee5dba0af60950dcb60bfb70af06d47d7fa7721"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "a0c925dd60250f006f18de14168ff6211e00239dcd8a05168926dc1d27106007"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "28fbe5bbb1ef3f535c40e0242b8924bd700a0e35549cb8d56e426439b508eed4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "65e98140cc2d2d346193dee5276dbcab44cc4a6a6cec8e1ceb8208e4952adde3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "c6eaa7cc4cb298af7c7416198aae499cf6613217d795e969618d3b67971d0d0a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "e56f272f893778679f404c7c8264bc5b7c4d8a06c5a96328a8a55dacc5104f8d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "40dadea5cdf135c14191ece4109f4da8b0cfd3615d617e1fe980908816dd5b07"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "0a0459af91df4c151c575a0442e6f674c12279de12866742cb681d902632a23d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "c2a2b8d129e0f262d37048946a086562a4b67625abb0458384de048dab9ad5dc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "80fa31d5716823bbf203758d1408ebc81920f02df9a9135a06a1cb061e6af9b8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "5d047b20f97f0b85f8b02cd799d8ce1c2102856bdb7ccb8f75b50422f4e13c04"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "ea5a777e936ca0c7299069d2fe05349f97602d96d24c984c1d528ce13c693c67"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "e7b809dfe1876f6276a27220387c5f2b74d710de382b46d0f3fc717b9bba58ad"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "fc5eecb1f324480102190bcd20b32d4166de523c8c0e87cbe110dadf40e0d1cb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "10191b1b049505ec88bee41f173926224556f06487f2fa8890dc918080cebf39"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "5eb66eb04f39b5cc53e159c10633e7345f224dff05bd64f486a77b6377e471d4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "b96652b89defd6b55c142b3548f12260c2368a408a906e67cedc626f6b82aa3e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "e92f0744b031c3eba6d2d7027cdde6576efb987c282dfa8a61b5d91ad312109b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "33f5b1e709fdb9f7ab790512915bb844f3c9bbed1a99018975e7f8a672e82493"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "c0d807a88407d74293e5e558d8d3d6cc23b40fc05c56e41bea3cdc9292e86e5b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "2eb123ba646ffe37a19e8d245837036c6f43d7cff86a584af055051bc2ece59e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "b426089051c22a76a18b3c10db0c621ae6b9abc0703772bad5c73b4400a76480"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "226a2ce1519273c757db4790ea78ca404bbe9353af0b40d1fb79d9310f92d122"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "a7a8bb123409bde8f94a29c2c83b5c50f209488a8cc044c644cc107e3a1e5a7a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "e8394b9b82175fac6b9f75a126fd28102fc104c45ede114e92b77879eeb75aaf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "af5f3bd3fb845004ef4c9c1b62bf0b611e1a1630cc9c90e85f1a4f9490780179"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "6c8c59a1ac053436a6cdb0e801fdedddd10b1826f70829cb4eee8c1b768326db"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "870255693f4502d84066d05ee638d838fbb8f9158cc735ca50eb95ffd355711d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "0c1b4dd9e3789a4379f1b55766fae97fe6701286c5d9758cb557f8a77271a87c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "8851ce3c300018696b9a10eb908f913b1626e9a9d6d2ed1d7a5087e43e0e0c26"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "feabb830d10a0ead8893acd9cb82159175af689999a43285ff68ea5ffd001a68"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "5d6f7f1bf678a20a8b4160f537a7a3f575390dd4a16227d78ba0966497ba954d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "93a167f85f689bb83885c75d7bc0b9559165ff51640b38ace6045c53a693f6e3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "254374a4315a66fce71e42cf1be1c9ad7a90f7fd94a4095762f13b6440c4e90d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "8a386b2db49df003b48c4344463d065b53ec7ffe9c0ad1e530712c567fe568c1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "f6bd470dcbe6e3c809377570f8e9db87255165338f419054593635dcb53c7132"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "caaddae93d712cffebe9104495ee2c4f8204ccc5d4d7ddbc5a41954a4d731020"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "f2b0f6d3d05bb6ef5402c42d828bcf1b13a28e77d49c669979dfbffc3ea8fa0e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "b7579b6a4a3a216e53e80ddf7437bc17618a762867cebfa1a528b8618d1782b4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "31d9b6e8ac47af0bf9ab7099d9d0077dbb52244bf64734b4470d37c26bba2d14"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "0bb3d2a6ab54a72ca3afb23f2acf0209ba9cd59fcbe6b7606585f8f8b90ec8db"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "13cb1fd1adc41e1ee2ef1744ab41140a61d352d04744959c1da61ef17cc980a0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "3cb4ede5840970af76e81512c86353593ac6f6f3982ef19c45278409d2b991c6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "50f6e7f66b2e788f6adcd21e627fc2e4b9f26c6c3206497a7a370dd56a95f74c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "69ab30ca044b1c4b31a982e0233e1f156af5470ef107518adabf3370a2c582d2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "7431c5da45e7910fba5aedcee67c16d96776ca589081399b69a4701636c78453"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "814139572680ba0435fc82b31b6ecc43243d7d61a851368363cd12e5a1f2aabb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "41d65b4ca7d4028a67471a21c414567ffadc3544f760e94c135530fc565d4a8f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "6f55b98ee5c0fd55321159d9d7d56433ce1b144da388b1a41956e94c9da0926a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "7fb7b8a9979f635c817b0566651a37bb617b106f80afb5b3f48465c3216ec56c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "6e18cf5c5a071b0e75f04e33c0127b5abf55868d101c12669c632f963cf83239"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "fe8fecc9b023b6d8d2fd93359c49753332bec93500d887cb97ea8d4265ebb939"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "e62abc3876801d07fbfb456fba026c48921fcc257f8008cb11072e5148c62f4c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "9e72786ef2e35607c0f7dceb08c4cc87668d9c6708aa197fb7aa9642ca6d355b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "1dcd7dc08b0d5cfbf386068d710cf23f160b52c69bfaddb7d2e292f29c088f08"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "14c7c845c30c3298649e81309839d185a29cbbe490998fd973b24cc47df823ba"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "bb645dbd9ec6cb9237e02945550110d7542556a1024270de892f70fafb8c5c1c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "bd51aade519e595a93c34a9a8cc6ea8c937e01e0a59ee81a83153ea26f335721"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "cb821167bd6b82290b8a9d44db2789717543fc8502c0a9655480aa3f9207be5d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "51b1cb476948d8e6e25ba52c73ff6146d33f1c5f98033f2692636a462f1b483d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "668650b30a84c1b736f2c3dd4f8a9417a8220ef4a69d95aaf67341d3ef7bb2d1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "076801cd5cd7824e6578ab616a32e704e052737c4a0b016ce6de343d5778487c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "bc8d24ebce58913ccc22f93ad73f5d4976bfa7a5f603c1d6fc4317fff198a414"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "f354223c90ee57a6e59a54c96cf157fb2567efaa1000e9da998560f804177c64"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "11daf4d8c25f0878626e20ad7604de73d0f544ac63061e6af08c95fa0759e46c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "d8e3550dacc5621af5be1dd3b0e127a3bf4c18b30ec8fdbfa957cd589056ca48"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "eb8193edc91aac30c880636e257998d28650bc4f7ddd69df6984d9452bd1039b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "e6e66ce41f8b7ec571cd569bdcd79d16161aeaa70d514642abaa450b6743d2e7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "e7b2a64b180f2e15d5a3b4c6962f700f68277eb8d7f8ea8695be744c960880f2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "6c575c64590c82aee85a46897f7fc92d9f1b1310217a49dfc969e8510b67e3dc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "cda23102983cd5bc1e146ffc6809811a675a6abe549a9102e4d17ddbed1f3466"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "a7bbc39c66dc08994ffb19b5d322b60bdcfb6771b51dd4fc72d91c66e7cd635f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "d3925c658286d22957aa003f6750885e7e83b11bcf7a18430b7f590dfd922d1a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "ed751896ed6cb5e5527b0267101d45943140d7cc1c5737ff7962b940a01d912b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "3f30c74f1c7b3d51b19e219284050212146f7a6c02b5b71d995cda7d0642f415"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "f88d4b9c90b983c282cb054306da6fa4ede6e63f620d0e724b686442cbe39512"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "5566a83c1ecf63d22266bfb54b59622cc80d9848c02807bf2d02fbd0fe442c43"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "2b1a6ba618d464e38b1c52def0bf2201106d9e88c37b11b777b80feace9c0534"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "07668b1447bb5789843ee31182f3bbcee62fa7ce768cdd2a7921f5f4adcf8cfe"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "a1426e9515d35cc693d3671722719946ac39fb3f51f7254ccef9a95b5aead90e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "ebfbc3ae082990562626da3280157e5856b63878c6d03cf7571741325e0d9234"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "57a2556234b81d81e67433aa2352804bb493ad7357f062a1acd2f7e4f4443526"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "6c5274dea58e9cdad30689a45aa74c4b9e4bdc049d19783505429b983083d736"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "15c8950aa52a841efe5995dd149796766f7ae37862b07d24bb4e9c8f493c4325"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "a890fb46301b689d48dce67fd369798a3fce37b1e8296697bb2a1b44e6ba3b2e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "8df7756af7794f354351647be0fea04d019f94d2908077ea23deb533fc38b30a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "b7f6991a3c0c65acea1c81f80719cd13f8b0c3b74d82a21ab079015806da4c90"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "79e94876f6cfbf524b6cfd30e4ff9683ed31698eb9775ef4015be1b2f45305ae"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "c9cb468c5bb2f8ab4fc2877bd78840f055efa7fc61df136267b53ad6c0c3282c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "43204b7d9cb660f34de045dcd263ef3c4755aedff38763efa41f2175a437ae63"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "a2f3632addf0748268387497af6916562ae43ac4ede937d564bd166461807168"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "e98ee325623269fee98163b2ac7ee2d043ac4e6da11bb2019df84c820f696b4c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "5c018f2dceb88c63d323accc461d8631647188d19694b1c87b61b483edbd2a11"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "fe557a67e7dfd9c9500d45b0c03c0e7cffef1e68abfcbc9d0e4200b3b1e787df"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "6232bcf8c0b5dee53c3d2b16062e7a4d5502a6f357b52e2a4f812b4f9176477e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "018be9999ad55d64339679af717882c92e1bc13fea5e75a6aa94aaa9175b4c8c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "26e859f36415799e517ae7bbb16349b017899ca26f21147f16f66f3e5944bcad"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "06d16b5a26cbfd06516f6be718f640b46a2bbc1e6704a7a487384db943cd82a7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "8297bf4d2389f747292192b30038e960f0c6d5380e7377b1c5d1bdf8d4f5a4e0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "00147441ab89e520736ef5a9c1d4c600bd97cdde3cf44fe9fa15f25ee6c3759c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "afe218b7282bfd7b046db4c55e5fbad910477d73aacafea516b0288db6ffd1f1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "d8913541540456c742c322560d311c9a8a473ca1d28e7c27b0395b75ace49d82"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "053ac5e1436fa6edfe38d32bd5b13bffc1482a42033770425428a9f980ed4358"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "28483574a3184717ad7b74c998b4238bc9ab4ecbc11031a953aaa04322535666"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "018e879a7a8b62482b88161577110ba4f949595f25ca28c2cbfe05fd095ee591"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "ef4644e19925521d2be4b323b1101b21c9b3a5c7f229721d4040e07fe82f1c3a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "20811da5e5adacffa2775102a6f73b9c4a2ef984dc7390ecf9c2d14f7d18dcca"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 228424, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, true, "3e2ec12fed9994f7eac3a07bb79b22adf2c4e71f8f135b3fed135aa1940550e3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "300a235b5c9dcdd486b41e02d5c52238d8b8da7ba3ab5fc1cf3e707f6e92f28c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165056, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, true, "bef5f6336ef71ca5edf0e98b97f83ed1e9d96b49d4e4595d6bc0cf261a8f44a0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "1a6f313f135ae96ba8fe60f73f77d6ffb16ed1bd077bc25a17d578ef15bed1cc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "ad2f82c0c59c8fbed2b787560e326e65dc85d8573975a9c75db5ac9e588ab2b5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 181536, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, true, "378f4b8abe8852b515f9e5454e3b191681524e403ea6208e085432dd9492101f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, true, "ffc635aef848ab59540aea2c0eb711b97ce8953402b0aad6c96e69722983e5ab"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "163040339cf844f0cf2962e852596a221847d66b9857b81bc0a386b0909b478b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 228424, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, true, "93930c620c6448db36e886be2021e6655abfa68a6f5dc0d0088c5d231d7cf244"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "b5f0bc49ab71a9c6f0bc137cc65f515abcd58ebdc2690b458147618b66964e43"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165056, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, true, "0acbba602ab4ff2c9eaf4928547b52b0a0db31e2487f2c2b0ef490747cb399a1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "b9f9487f175dcbafb5e3db7b4ec836ed2f59989ec6dbc582c032226bf7efefc4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "567f32fdc008154a94a6829ad7a7c9e8f3e18397ff7c6f9c101f0d90c9457023"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 181536, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, true, "37725e99c51e2a396168ce82bf853af0232ac096de62fc4a06bef4d7623e320d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, true, "4c28e0ec2877c1b8070165541f0e3aa063c6f2b6819b53ea4c6eaa0926f3db31"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "534fba046defbf545c73ffe5d55e906523596d447f6bd53a83d982a0e34a41d0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "07d159332ac561f9b7bc9c4bb85c073271c8469bc938f3cb9e74eb6caccbea81"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "5fee0b156933c7aa34c7bba7b901c464743f572dddff6ccfbe8845334fb2b55b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "1b171f1377e4579ed8fc208c154f86e85aa061ce22ee8030f06a4ebe287c8d8b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "43eeea2ec678bcc764886b21dad64d7f5ed457c63ed619bd3c535daa119e35f7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "6730a3e3b40cbe3f78598340e53682eb0e74d89f136d1edac4570a7b6cf51f96"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "c11f785c749df3df560330f13a914c47dc6f11f503ad97c28d8c3eb6de640faf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "b8f2b5b8afd5c738230163330314d87e520cd19dab4cab6f47af7706d088944e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "6d2ba29a818db621ef68deb9fd524aec20cfc0eb59afba49554ebe0ceeb166ed"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "40f03edbacb2a2e5abf73aa2c41b65352999cd0953d99c476f2651394ead24de"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 160016, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "d15cd0e216542521fbeb7c1adff9468bd1919d140aafe96feaee1576faed62ce"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "e7ff76396141ac163e39ae45751d65ff6563d94659bde437c2445de53c0360b9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 154384, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "a91e69543d2e1ef949ff32ae21ee44225104b5364cbdafe3868c9b47908e0472"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "552627570730a70b6dfa4b178da52158a127f6c90c76ec99432c283e81582df7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "06f56281184cf2b2911dec7dd870d57057da0e53de7f8d1c97d097c630f3c8a5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "71fcaf753ebc06f6d5a03e39787061b7cfa0b7243761ea7e75a98bdeadf34d8c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 161136, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "8528ffe21907f1e3d4211324a208bee70c5e3b66421d839f528f12099715bfd2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "d6643284f4f5b2a36a7cdf0ae430fb1424180ae510a950ba1d36e60da1d7fedc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 155376, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "15545a38596156776ae6acebee25611c12c29ca19b7bbee9a225707cb70af7c8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "8abfa080b8d7e12b1b04e78c27e8b0e1c7bfc8f144608f3a702a6e10bf9de322"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "9f6e33ad29f9477e1c4d6a3211bc2f6ab2c381bdee7546931b6739c366e0a84e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "222deb66bc77f6c57cbdacd58ad91a91d0b08abb40bea34b85a73d8828b5fb5f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "e53a02d8c7f72bb86621ac139ba23565a8b687b89a0fd6e37b216e7761b6b12f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "d39f9daec11195fe54a853a766278bbd749ae11e9d55fddabd817e5c4f0178aa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "fba9aa15c680a591711b0ebf299587a40e21e0dc50d8ba8694d34e5a16eb656b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "e7f419e1d74e5644166e07bca1d2310c76a1acef8f54aec9a900029362b57d51"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "4213788c57cef8668fadab439bf3f649c6dbc500ed427a0b4913a53bdaea0c51"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "f4ebbcd25c6c0880d74a4962a59e3aea900cd636c6510dcba616335d178dbbf8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "a84664675a4ea42847ef7d0b1bcd7d3a3c742ab24092aeb40404fda247a90ff1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "00b48e940d3dd1d4e203a85eb8423c60ed4f49b40769fdfb57eff9fcb4478822"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 160016, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "760e6ffc99dbe4eccaaa75ed220e83299650199229e93e2ffd66b2270809e0a2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "0b638011a9dd3037ee856a2fcb872583bfdf206c2b23c4a4ae11fcbb47029e4f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 154384, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "cf374691dbd09860396aabbdb10a1a2fa987a0d8ef06974195230caf10b4dad7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "c8b85113e787f8c25dbe81ad65eb93978f883255faa15dc671f71fe7d32e5dc1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "4d0c26aae0932d26c91478e7b886e99a3cbf3fafd75431a92c639409435aad43"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "babac4b45dda783376f3f0bc7f5e298cb3f96c5a8c02bf2f4e1ee6743f0a33ae"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 161136, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "22d0a2acd2f808d580c2f77d632a8f1a0c90d23e267f7740e1a004a953ec8785"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "29ecf87e6d164c0e4e6d5c3943c22e1b2642028a8610e61330198bc523763f37"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 155376, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "ea928a6f5445c0ee1cb58655d2eb2e9518fc005a3e72f32cf9499425afcf6f91"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "8b89698df497b43b985d39fb9c6c6e6dea00503ca411470479273af5f67aedc7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "99fef1c32dfcb40eed654ca89f56c9243395739d819aa8a2f56cc03b47181470"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "a9f89e3dd9374af5913420e6638a80351d60e8de41d599231fb6a98c4b134f40"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "feb18e56f4b5ca7af0668f26318f16de370165ab60d10b96ee37d4f855aafd70"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "0330e514a9c2a86e59cc4a8bf52dfb944ac0b78a5f30ea473e5483dee33924a4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "785069012d50023717debeaa1ddb4fc1e632eb318b73193a8c509824c8e5f59b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "685c8b2e93e97b8d2ef0622e11d31d912192250838069647840e7b154bd15656"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "8e0c05f780bac4956e21c394499ad1801f662b22a808f349f77587dcbdde9706"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "08107fd61e0ab7db5e6c5bdcb30cd75606ae10c8214f2dc1253c02f2302b79cc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "93826fec138e6aa3e2109aaa955de41efbce3a65537d92033e03d492b57079e0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "db1d7009c1780a9957613d5137e1deedf872f4155861e052f7b4bb1c6a6bcf08"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 160016, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "354929cd3f9e1e7c09238f63cbaf13b9bb3f06fb6381bab9d7d62644a912eac6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "d3876f6e6eca78f059c0c64c7c735c8ed8dce98e00bb74b8a43de1d6c7f9a405"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 154384, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "86cc45c8b158bab2bc8f478026686fba6e0c056b0ad28d94e8e30b773e257373"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "eed61e6851ac7e90f5d0e9e96b5d79be3a474592fc551b46bb4920befc541005"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "fd0fd2186ed13d3b9e2158c8bb09fa55e22bc467a509e147232cf939907da8fe"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "f0b3ff1e9f43165c1d3245f0c460d49a12096b4bd57601f8be225fb7fa5dd52f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 161136, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "f17dd14c54fc47b57b183e681c770593dd6800c4cf4f0c835c16f15d86904a21"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "75b9b7d250d6f983fc1bde3f9c4210bba4748092295caf34ee56a2b2c1ef8ffe"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 155376, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "6624dd8c128f77f533734addd1ab1d7aa01df8112686a33465353482524c29d7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "ad9d4d2551584640188e14c613e9c1fa18a930477db474a94f1064f91e7755e6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "39a4fcaea5950fd52c0d0b3fe870d43693ba34a9a9234fa79f0d29544523d2cf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "e6b542973ba55b69898ca05aefcb2423da70868d4d45350b6e53fcdd5a3c46ed"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "e1d57236e953088a8391640801409df16c8de12cc71c67f069ff0274ca01adfe"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "9594d31e8b390ee82e4e978a94139eb095543045027b9671b767a46dc4f50e01"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "f4a786e74bbcd37499c0cdbf5e147da3284d6f99dfb57664de9c34fedd0ba30a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "d5068d1def55aa6afe1dc49e4e0e720a17cb2917d4d0e1ef21db89b1767ddc73"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "88dfbfbd5feae0470f386206440f78b4bf20fd0c29a1e43fee72103f63998dd5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "a7ed74bda3dd0b05b7f79106f9ff30a0631de447005714b56c48ccabafc2e81e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "603e50a9f3f1ee4b4abb95bfe8cd9f574c69e5352d86c0474ba403bc940a12cf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "c7c1663bd0cda17bf7acf9aceabc268476a38ff4a9648bae5db99bbaa1f878ae"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 160016, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "f4d979f683a366265972af6dd45c64debd04bed2a659b5f5bf9afb27f8c6dfc6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "3c785951402d66efbdf16c8c1ea68400f40f8b8dd6b73b7dedd71e1883c9db92"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 154384, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "2660f40cb9130c5859957db03ab492eca2c199139210ba669d15b51dc39daba2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "4f26a501d5cbb49760c374ba631b3db60d01ce4975997bff6ba7e3dec284ff43"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "bf79440ac2d1a8900b801a3f232ea0e21e0a31b8cd78dcbb9d292c29bc33b53a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "fb1c55c0e7370fa0cd6d1d082176e8d5ef50458d2c4f7a6d8204b5366d952ccc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 161136, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "96f53ce79023253307e34fcb8b266deaf51e7844f1a1d1a8835b29efaff5048c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "eefbeb07949f228e6b52c912b4a58b5453b67ed5e91667c190e0f2174abf4f6c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 155376, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "c86ad6607ba227d3979042784c77bd41fd11445d9775bd495ed59857a5649195"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "6f120e667b6a89fbe7f34b592ea295d00f3c8fe0b0d3032a11d3a2c4b6bdb0f2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "9625c6049cf8e59d566e0f3b1043ebd345f63cf7db7b8dc6e7c4598cafc91282"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "879e692e8fd761baefeb9152765cc6c8631db61ffed3212953da0f0dc8525fdd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "e1175eb9af89167af89a6e69e8a28c7386822ba323b62d5767471d09f5d09f9e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "a310d5b2445698fe64d715f92a7690b9e0ec950f5cd5980df2db99c5b8f557cd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "4fef0f984e8a1efa1f09e4aeb0c0d39b6425e3f42074d34b8dadb8bbbc890ac1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "3feb2764cc01abcbefc132c4d8418c8c44e07b0b2018c9f2ace0dc8cef558abf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "e9f88d4f06e5ec6cd02878b63f17388db16d6c29ee1f0f8ac0c6ca509f4ac75c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "31f73e973ed8dc36536b89987d64102eac10bb40506e53a11642d51884741b05"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "e614867fa019dc6030ba89e29547584c8516c4f1af063c99afd503bcd7619301"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "05919db7025dfd1e5ac7ce19c338d54b9323d6023b4ae27536331ba52422f9b7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "b62dff3541dfe36fe4f28c17114e5c09af1ccd3322d93e3db2cee89d9df8606e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "dbd36178950ad5b321e4b48e4aee0d0aceca343826176ab508ac5ce4d907d6f2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "1ceab5cb336c4378a09f27e94a00b8f82924382fccaaa88e637f4d7ed343d92e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "89fbc03a8c1a0cc90354b350aa6758aebb8392beb59a9e85b18a781b09b712ec"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "2da486f8d5c58d06bb712dca782f3b8fa8f93d7baa9abe896eca561079adec53"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "aba8180cb899f0e5e47bb08530d17c6c9e280c6e368706887dd9b679f89c9cbd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "826ef1433822cf439868f40476cf3151bbd331fda029fba7fec531bcad697269"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "da8dc5b1e880c00552718b4beb7ad36ea66cfd6b78776546d5044c5eb110df5b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "b38250f150517c4cab713be5928f1292742ad8a39b4294fbbc32eeffc34cdd3c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "e02d72f8c5a16d4cb05161063f0da504146adbbef293725d958eb9631504ba5c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "44ef56f8aacda0b1b3eb046e3c0cf184824fe2fca6239acee63baf9360388dff"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "cc0d2840369e6161498c7f70c93b5acbbaa8467eba5dc379b1f667eba3f49e2b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "826a059d65d0c53086a33bb8bbb5468ee50045227dffebd63c86773ecec794fa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "a601c0a7ecda396009928a8b86b6ab3f84c33ca6d88cd7d3fb3b15611c2a4c9e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "723c2bb3567e04525b73ed12bf7b9af9db23665213d374fa5e53d17ce200b108"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "db516b30cde0341d075cc518ff2cb17aca2dc3f27e9a73be128d6790c3278fdf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "efcd4a33717b6f763d4383fc5aecbeeb794216039ef5d0ca7ee27906fc416a6b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "cea23228841990f2b821c84bf4502698a011c4a1550c80765227be2e5b829260"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "6c75c63e4f428f089d09ed705d48e0c045968ac1f8d053388ed51d044283fe89"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "7c47ca0473d85a0a87f665007e66e5f019fca5a254ca1eb2159deb3fc5e1b24e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "e4524f1c9f1157ee33d20f0fb95c0b4ae35d67ad7d50d351ddeb16d49a85321f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "e37ab42f2638132f17814997a0dd332ab0e4396e0ec1bb00c8f9e4016609dec1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "fa4805a8e8b95a12a928afd0ddac1b866c16936e1eb794560ea7dc25746ee7e2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "0619e3009502e40f6f05c0b45fc97a750b17cba4d81e3f9fddaeac857b8745ef"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "15e13b8655013021b5755a8b818c7ca0bc6065f443e6d2e1b3c0fe1c4880f8a1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "c0913175d17d87bb7bd316e6d75c2bf3e48a5b94b5f76a410314fb0cadc5374d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "4d169524ad66006ebbe60bfd8cc765d5c611948732bd8d90ef212e54df752f2a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "1ed965f53031bdb98de1c94b09758de354164397cebea9c3dc60888864f6facc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "fd66e787ce53c537461447f826056c0f04a2986d9d229582481d6753faccc9ec"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "f79b02f2b7ca309086ca814d1ee777fac9d28cd91c45aca38432d64980c773c7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "6e7de6a6ba9effe2a575715ac05e51c42dcd0bb5ef35d0de81e99478fc9c8bd5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "00e099844b71da530ccc9809fb84e63c24fc48db17640976e9f85ca17f897d08"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "54ca5c99a071ad13f519d8b249c148d8a4d8e76a1d8a99754440e03e93c4a48b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "6acda33b59d7908c4ca6ad053df5a685d35b8033641a76b17cc20b41ad519f20"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "3e6ac5eb3979ad26b70a3b4f51a7ce895fbfc9187855a9753461e3980660e54b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "21e33b1ba8422175f4e63e07f61a027ca5b0af33bff8ef30f8424fb87f9f2213"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "ab8f280fc5745346de7eddf12da26e7493a5740b127695cc5727e5327b116bfa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "877344b9238ce0ee061b41f13856df4d7cc00fc6348f932c958812937381fb67"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "1af490c1b5025ee2d143501ebab825e6d09753823c1609a637d70caae1b7852b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "ffaca81442338a01e4f390deb490878440bfee70b07937d3be09b2350266d70a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "adbe9f3b3f0d481b8f4ba130437f2eb1ec5b2f6540c60a5fcf08b3a6f78c3feb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "4e79558f6e5e9db6c9f22e01721f4704dec86f75b21c9342661fb1a34bee94dc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "51bfbc543c86abc066085cd206d4b9a7d42fa33813ee7523accf7875dc7fa36b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "71baa0376f53e5eddbaa4d8248de7aca23a4ddb965f2252043130dd38d95c156"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "05d9635856fb527d440c915b17993e4afc2114922611a8c1243238f8e83480b8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "83c7b48d80321c5fd498fcde432ba33475a426f4f855454af0dfe0d701e2e54b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "27b44af4a9ad992c7a8fea3a9dbdbb87e11c2159552c930b798a2f9c5a4c40cb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "4e50aeb28c3cfa6cf3c61b58cb0dc99934ddcb900612108b6971d62cc96aaff9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "c42bf396ac824e52f9095ffcddb51be8393a901d185923f7e7411f57ea571ad4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "350a6d9b5940e900556c934629634158f68f6f674404964817b86ee6607c7d89"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "a9261f878ea8a4b73575194d8b33b089579d0b3192e86bd63f25dc5910da32e9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "02429b4935416eb1ce568bb6081ee4708993bf11b56a0e3e1395dc65dd09c3db"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "238e8afd31b95fc44c1b4b59660463c87100da79ab69b64dad4407c5c6acd18e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "214f08d80064e0aec280c632d8319f9410ffd69cb2d02a276de0542223fbcddc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "f96e727d120a5ddfde075c764c8f9b4f60aa8be63b880763a84691be7afe50f7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "4d83b5d1d9d45c2b337fe72871063689590e0b2358731ae658a9e02ef9826168"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "7777e50e2e3466a88ce817220f43fc98ae028143702f8bbb0d08ac3742dcd6d5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "0c4e5db86207b8da0787907ccec9c22e07fe3320767a37b50568866bbc1bbd49"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "6b93d5b34866462d0fce7b3c4f67962ca743483b458afafd20545276a3f222d3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "8123c665c7776ced491b517610abb324940ef51ae85d377e033ed8448af0b58b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "f2356020be4cb6ecb0c563cdd28d96b08be489a3765a78bd492a90b7ff90da12"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 191832, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, true, "e1995514696464f5e52834887719dc51be3cbd842c497e7eddb9e2a5200737b8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "0faef584150c16af7637d51b630bfa0f1946da2b238d98ce575ab0f79a03ff75"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 157008, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, true, "7cce4dd0dfde1e3424a9f7bab57d2b9195de54172cf12fa690da700d14ecddf9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "e61eda93d4ff5a224ab829f4809273a34e7d62401ab5fd1b796d275bf016953e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "2c102ad0f2720d25f73458ff5dec55e1c7496f193d17c4c837d21facd5c06d4a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 165296, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, true, "6dd583a67024c6a45b27ebeb84fa255bda5c9548529e08dddcd987c29af84069"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, true, "9f0bcb3537a4bd3971fbfb4d843f68412048f9c02da26ed9be0128a07db35b1c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "073d5c2596ef3a6c22b046574ee76195728deccd297f301bdf708663f31bb185"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 191832, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, true, "dad9baec080b82841a85691c6e73ee3b9b6a5f5007410befe986b3cc96c971b5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "58cec7dc90e45885203126e5770ffafb7886a9b9603efbf4d59636660b74ecd5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 157008, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, true, "16ff01ce4b99d42e27be313d5829344e6a899dedff37a1d24d0819a8e9e812de"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "d9a0fc32efdeb22a8a93c28a56faf1a9e03c3ab3c6725a85ec2c17426ad24c11"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "400e2ae4cdbe13d6f8f12d73f20644f32fc69579a84fa4846d03f9f4c031daf0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 165296, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, true, "7268de6056d71cf6fc56ef60f996f1d635495d936e2f78678d2723f3d135caaa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, true, "8c71857708e7c34f8ddcd01ea157c417951a3292cbec1d953579103dab4db987"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "5fd032abf60c3f95fe8d7b4d8f9e00a7f6b00ddd1b7d17bb0b535371cd9a29bf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "008328b4a6320013fc91f4a905b267e793369f923afd03992338b3f91887ddf7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "553ee5457ac79b6b01105efcd7a6d39e396b7f096531936356335102236c9a97"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "eba6e1a90755b3353ae18272fd6ba70912809d0c2724f5ff8175f4b86d1d9b7b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "c866b0ed6e63b714d537bc87feb766ab9b8110f34fd5615b418baebbedeb6e5c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "3bdecb44ffac5f407d8640925c2f7bf889227954007ffdee82bf7944f2c9074f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "349b07269fccae1c360c696417dd81ebec28f1c916461e6fb7fb8194a7af614c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "ff349f0333ccf1a27eb75c100e5baf91b0961d154c6fe0d4006030de17651eb4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "0add816aab032da1ddf5dc9f07669a4f0c037902dd02a1b02eaf330aaef86470"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "508193768449592ad00ec07e95e931eb0890a6a809832725cdebd0f4bba085e5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 157088, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "bee939e6722504b606d3018902b2c9ffaffc863bcf38db7c0a0ce706563869c1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "2289d9d81fc1d319743f82a534e271ab4148df64c35390f3ff7be29c67d824fd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 152992, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "1b586bbe301cde68ee5cc1a3b0afb14d18fe387315f042d45964c3a9ba1672df"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "0e7e845e98890fa5d461ed8998eb015d4bd87564de6913d667b85f83212bda88"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "35090285b166c1c85da5db15f3e561561b05cacf56615f6ecad2c2ed92af0989"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "8859dcc61af327821e3444b884dd4dd3f8e7b86abef8c8043a08fcc008c9bef9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 158208, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "c373f865725ac3f54ca7cfbb144a84bf933428223f7db10c66d19d40cde430d5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "b5367e8548c98ad02d0a6e839abb01cffd02311469ebcdbebe6c76e50a49cca4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 153984, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "a9f27c8b06cb5c5a7fde78d0d3a144b351f2055a519127a2c47c5ab213131f55"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "2c98e4a5612155a84e564daaabed2cb97092da77585032f5e632763b9d452923"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "bf32b84c2ef40649b37ddd6ae7622d415c78044e3c623b0abafa6c6528eecd1b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "13cb72cd7594da8eafeb63156a3d791cbf32da0ebb8c8d130daf61c8e89be394"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "e4a7d456769acaca6886efcdab22fad750ac35e8287d4e52d328c8d37cf21343"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "0b98f725b004d004e3907c6e0a0cd9c11c57934e6055eb7bf31b2802a6d72d48"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "dc915b45e659e3a600eed11314b5fd790ee9120e53bc3064b51e57caf00a5ebb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "f4ae2863017083d43da5c3e826f221b7f2d8ca9fc3b6ad024ecc89e0a46cb30d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "f7a893b0149d5353c0def0bc6031db6e99194baa842ac14fb1a2ce1064cc14d8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "b020e1957a94ab88fea31265f3432bb1c4b5816f15d462cf523968a89c39c917"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "85a36e76943101a58eaa15c7f65ad57110128d26f50b34c13bc534cfd99ca537"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "74bf54a948061e9bd3750f64a6a24544cb81777b243514c205dfc2e407d198e3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 157088, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "dc313f75e9d0c4ff448b28fbd2b60b394f60b91de94d9dfc8cdebaf9eb868c3e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "ebcf8e7e34e831f787c87d5ce9385543ea486dc0cae6763c073017e54be5a48c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 152992, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "68d51d0dbed9009fa9944da2be80f60c27d591281f4ce9869597587e0d56fc5c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "be75311ac58360cbe81b27a32f240b4de264a22a51a2dad599758dc0acd7fff1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "9db91d8dfe6ce2a859c570f266b6cba47c25cd10ca16fefbaa5fad9336a9feca"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "b662d6da757ed4719cfba4bc7d31511e5ead20c790712107fd0d91f6e379def1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 158208, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "dfa98e67943880403da04f986dfb0f0c57cb187db7197de166848ff4cd6f2d4e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "b06e05adf6634965a4c148ce7bcbe2c37c959f9b18dd8dab38393d58ddc59d0d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 153984, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "86418c42f335426ffc922b4e51508bb4131723778d79df9cf6e0abc4be9ce01c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "ac05fe2abee81e5d902acd272df1b807ce4e6e47b9a3bd16a542733bde2ec7bf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "082523f7f1eeef2f77a8fc5664a35e3465f52d1613f1852886763c7e9f6530e9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "d247112b0b88b4cb3b14721f62b7fb870c205a32e86c08a3b80965c1c228b8e1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "7a767b939f735741072e76ca589eb169f2fd57798a603caf7374f12ccc91fadb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "4576b511511e23517b32fe3150c8dd0d653ab1e64496de0b54b4efd7c9edcaef"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "d1d9e848161feb7ca99fbb6d2b4791bb590fda0cc1f51bde1c5b5269724bf71b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "80c2d48b7946628f654bcc4c0d37529170744413ed7c256f93461c86359b99e3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "2ce840f9864f26661c502d5f12345568731d52c26843384bd1e48c6d9262d64b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "043eb0384f09a7dd7e1406d284a1fec0a4786c926f00d9bf6790119963ddf129"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "e3ce4090940a48310e1b9af7956de380e76553028b92bbe158b0228ea1c428fb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "7daf80b36c81c1d7c1657e622ba9565753c4f7303c46a56bed3062911db315b7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 157088, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "6759ea35c9cfbcff14b1b9cd2975fa31990fca1d5bb7457104f90699e4211c13"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "727e6ce683277d07e8f559087df40cc49901df7e5a8d02844e838a9d0d2d7b14"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 152992, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "ec5cbec6dbbd8a8e6a728c5bbf44d9e34a5b744a2313a3ee22a13278e3f2ebeb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "10990e1e17faaca2f7b3ebf3cce60fb0aaf1a2f90117554506bdba435768c163"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "6c7fc39fe5101e1eacd0b57569ea5d756b1fbff5149976c0c7e9be2a86acb3dd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "2203be49d2bcbd997f2fa4f2c6478b2a6cb232e0f8eed92aa84b4976a0b58b79"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 158208, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "14d33b891b3699a50492bf18584249909b084d8b8e7e5af1e78a2702bb3da229"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "4fb3a828286f8cc68e3336f361814c7604011f5b5c54d76225a6b225daa26c3b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 153984, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "1a7484646e96ce9bf5ee3d2da3740f71bb967473f1bfb031052975e6d19347f0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "6536ff2823c20da0aac606ded4b847466b05460b01716d2ddba41bc993d96a65"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "dfd77cda28bfc5919739ca248d9f4a008edf1eb63304a796c316a5908fabfdfe"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "a985503a1b5ba45cb408bbb0a9cefbbf96ad810646c8c5d23bc7b0588f92a669"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "b6b7d5f174fc3568cdc0ba793aeaa5177f0c758e4526b6b6a856989c97f28287"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "728669688b1dca1bdf250060078a1ff7a91024622a14b77bd88b8194d7c0166d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "5432ee6b7b2b94b52d674e4902fb251a0a46268a1c3e7fc5846af422f0e42c5e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "cee584dcbb049bc380e4e3ce3511ea0733d61db90ff895098837f6cb3ae4b81e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "8a97f0ec90d813e718bcf47451c6f27766d25493afef83295701bc880bd2de6a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "b39c6beafc456c6c7a9ca85d81453199d3674214fde5da4e81281878cf7034cb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "f02850e811373b00884e1ac77ffa6056eded56fa79ee09100b00113715af7c6a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "371345b4498d30404d1586b35f332101699869d88036959967af88695ee663f4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 157088, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "e00bcf723573cdf3e14aebfaf54662daa95acd6fa7bbbb39ed2200e20dc4ffca"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "38dc70aa3a3a8ec2f32e951265b1332476921bbef26719d5b59dd74caa52143c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 152992, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "2dbe6f6c51fe99b3727800ec29e46973863f75482f4339d9447fa13901a653ff"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "08e3094c74d1b18848d2651dae683e45814af1848278713ba9acc782848a2736"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "97ea88716402b8b9c1b7f411b2ac6f4daa4872f824c9df48031f6de97aaf4134"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "7c1e1e6b10d88346718abc6dde15da16cc34b2870b2f7fc7e58f7aac999536b9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 158208, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "2b8c1d7db97fa877fee769a24f19f5fe459f268ccf228d08264f597d0f1f2fa5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "f000c803207a7f03dd14c997ea05460d9eda6a0cf7602c3d88e420a1b55a2aab"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 153984, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "72bd399638318e7292fe68700d8742de2f5e97b17f8dcf04e4554056a07b20e5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "bf538bc3a66b333bd6fe5340acebe9ebed53fa425b9e07f7afba055e93967754"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "48c11a19f45eda9c53771bfce805b4984a4e9d663cedcbaa3cdde095cf5a8aaf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "c8fa321f0ede6ed7d3dab57e82eccfd75688a4f0b07d85be21ce4580477d3e4a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "9caf009705aa88222cf29b3baff8fdcf2496226f4e24f27973ba564399eb6b3c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "d2ee3d0881d9e8b5c350dbfa1a8ec51f41e8f2a0525ab2389c08e2a063eea63e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "af4c2be3a95cd7b5a8416cd61b075f1f4ef9394891d73f7f28433fb54efff278"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "c4b6721310d3c9ddab4d8c050cbe2c5c3921acf708754b19dcafaa272aeb9db3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "bd423da7c1097e8a52b1190b89adf2b5f50353c936a8d398298aa06b10d6079d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "e5753f0af3cde7dd6b5014baf182d8b106a7474106f3d7bc2a7962c252810ded"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "a08adc0d9e4929eb95151481556fabcc2b93d4e59f13c31015295024cda59182"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "cdf6a67c465b405f07525a3b4adb2882a897f1457587ddb566c3ecfc1ddaf77b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "aaa7218a9b82768fa546213e8974a43265730bd9d266bbb6482e3270b025e96d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "32288343e7e76916dbba11b69f4bb0ad05b9b88ea13bef9f53612958a36c902d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "5c5da362a0aefc3a8b8858a34ba96a1e4d62e1478b902e632fb7201be0245da4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "509defb711839202e18712181a9415906738121289bbc0701cab923b1279094f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "66155352914a4d10ed3199be1c98a1408d474e9cd0263c2fc674f856f82afbaa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "9a51916c6f4fcade846bd15ac1d0f7da570d7acc9b052a800be6b97d19fbb228"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "7d88ef7894cd4856eab4a4a6fd4fc456ab3142b34aaecc275a729009178cc056"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "782c5eabf8657154704182728dde45c901fdaff547c9aacac9e5f00dad3608c8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "729ca3550a4c0a62075336d0c1b4e29e81f4728387e3948a32e8e64019b14d8b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "352ac524ef9ecf862617e771a5b23faa79fb1c14bcfc2fd642be41b54bf55dd6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "54876dc8d814edd6cd166bf6f088f018f655c7ea1a353738ddf86ed033f289fa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 228424, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, true, "fd2afe05038b88c99ea394db425573422ea8cc69d2e3cd3686ce57c03ccee544"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "fb274f6192e2971230ee501dce830e4cd743347768445b5b756a9c16284f138f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165056, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, true, "5e0c76aea402e2e9db5cea1955833c8bb9d4cbe540b00414390d76129d10be01"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "9d1e3a1f336d03b2d4b536f0e93cdbaef9d3a3fa7c481916e0082ddf40052ee9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "ebc2b40a5b7fecfeef0edaf3a2afaa61f49cc7e67f8a60311b18160801d4caab"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 181536, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, true, "c49c54f985425506c70aba66641bdb791b4f2c38e3dffeca1e31fea0c463733d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, true, "83fd1b44d108f2897d35813da7ff99b80aa25130a757954138df38abe4df2302"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "49e92a18fc6909abfdcca7e9690a35b72dbe4ef25eb51ec9fd30fc1b1bd36d77"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 228424, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, true, "b5693df488595d9c658e5bdf2adeda999b5f2d518fdb995069da73f60b807eac"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "77d4de432f893c4f215f81c7d54877c35acb5821fb33821adb3981af6fea764d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165056, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, true, "908061e10eda8e3715b73622b14807a5912df1c6982fedc6af99653ac08a2f43"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "6e3098a635910216fe20196c06264f12308fa08674892ad19982f42cde194d4b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "68466a6b8b648c03e4ccaa4b4cef8f2170540f332511312fff1d5c1cf28084c2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 181536, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, true, "09403ebd4d25ccc81965d3ebf0cf287a35a98aaf2dd4c4eebcd658a641149ae0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, true, "6d651fa2ce23d8e401b124be10f8256006c7df8f2c27c4683f89d20d1936c9a0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "52bf18ab6aa1930ab719d2b3892966845f9b05a2a666c1dd3f661b9d3dabd8fd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "d0cf95b19de5808871be12ded576c2536bf07b2095df069299e1194ac5fc4010"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "4981717de188e7a66e308702bcc0544c61d6911ff4ba2476166a9dc1488c2aa3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "d1c7a3296fb03fe45c9fb7a4bc07c503df244e79e0a39b809e49151039058171"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "45e981e963177724fd88d86cd12381a220567d5c201ba346ffe500ac8e0d17e6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "6c8a12760f5a056d5ec52569cd276bbd5bf3801be9b45eeb5f9897c8b590e2ee"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "24e92ac3c53e826aa6f3470322289eaf2e0918ab239ea4233f758fb687023020"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "98f3dcdd32d7765a00503484f191802f685c431929c0f6833e141ec9f3ad4c6f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "0bd81e25c1dfd7d2e73a75bff137851f70c9c59dd432543eba1f53d52682b388"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "7ada5369c91215f2f4310846557226ec369d0e9e0ccfc9445c4529bfaea9765c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "790b4fcf563ece3922cce40ef425c22229a0cf711c9059c3519d44fee21ccd6f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "dbb788e003499ca49a7c09dff378490cd583be3456d8b5cfa29665adfd6a2dec"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "53a316c89367c372886e6a2e2a2c4a013c2d55584c0872a13053114b21e2d807"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "2ae3e3caa01a1f96c1ba0af846a39d54f857c3d229267409544da70c8439077f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "2fac131575ab379ff53688cc1e84cdbd2d304617afb0a3938be5d4cedf5d9814"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "91f120b25dd8e0c1906044dd0330867beec05cdd0f7d34ad69b1dee3146beedf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "1cca0e04f3eea0263b1581f3e29e9e397a70a228ee80222ed1aa4b90df0acdd3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "057c59e2536df7126a5044a84b50fcc113f7d6a883ec943e5b536a50ef92035f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "f05a512aeaedf2525a87c525a01c6b4d99154505e29670c276ff79717a3d6c83"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "ea559a04a65a1f377e9736834ff4f3f4c4607e0822a1e10405adc7f5be05a4de"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "9b91420b2ede9c892321e40e6ad429a3a2757de223f63ad9aa40daefc1f05ccb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "f00b9ad6adacc581bfef6fe87ca151473b6d4ca6176690a4707b0e018b5afab2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "31d88c4e261c6b57204492b166b4c289b2a104c6037bb808aa38847816fa2531"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "a82fd18afcf4eefcb7cea15df7ceb717e336ad9db033061e0a13ef4aede459f0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "e1a4c334dfab85a2348b8a6470fc848a70ce62830623ccc045f41e659dc92d32"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "636f13bd2b1245c0f3bdb542287255e1996a4d7b46622cd4af2b7c50acab146b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "998a2d6972880fd5bd064201239e7f5b96fcbec8335e5ad98ab15ee7556d6eca"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "f236f762c1dedbe86312edf06aeedca6da5aeaacb47652244d12d4ade5e1ca67"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "de5bb5303e9de618e2609a2c526f084178754a43fb6c48d6eea01fe9a0616093"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "d14d9ad08504a92158680c24381efcf78e0e14177107f63a8c6bf43365aff8a6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "72406e51f47a45d4af32035e383bbd79cb27b81be06f2b7d8d30505bb88fdf8c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "79450b8c8cdc778a698c3ccc8a2d7bd287eb561b3c57644cdd35d4a3363d0dd4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "4e215a695b49f1638143508bb52e3e465d6767dba6c02c78a153411b614a1952"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "5c67ba69d9d5b1e3b7801c6e13e879c8761ef9e2c5b8e10157cdf3ae1eb064d4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "fc2eb8453c345c0ebac9ba9b235654f088172655a09515c923c1d1a08a8913c4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "89e78e11b866bedc48e9470c5dfe19b3ef31ceade6b2c5e47b6c32cfa62f8b95"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "1f65d1816984eb7b30c093a611d568f030727fba459fc76df3060fe89d425bb8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "eaa5306b673e5d6d8742c6a41c0ee5f69f13c941469b9b55a8940a673c57d9c9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "b836ac0a65bd0bbbafec03ac70cfd5ae1c28308bceeff2627dd37332992686cd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "d8f125b03b7ab9f8895bb751c0f06463abccfd2505b13cd6f66b65c67c768d19"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "b7bd73e07500ce4aabdefe2d0d8963abc3741eb576a77b102af4e2b5ffb5b16a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "a0213b08c0aed2bdf00edce1d8e6f680224b195f657ad54f7d09787e02495264"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "db154d5340e477786d99813ee1965db96b4cccf7f13f5410d615d8e5678f0bf2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "6e75a56eb4ffb2c307886298822a1312e77f328ec279082ae6f928678a83ec26"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "a493398db087308fb1b3dcf3b779ea1135c3669227ec3677e2a2971599db8a74"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "f2e13488f3d8a10d831b0f11fa891886225d675da0f6eb538e3fe4813a36fa3b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "af856f74506c44f698a897645fcc952b8b7d0c891254239752547805933146b9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "3e264d41fa84e3e62aa7bd27f1a526b5b44b143b89ae352ff080abc352aacfb5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "7fa2e0c7b5f26abc44b6c4ec1c1c4df7bebef222887e20c84a3213f47a3685df"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "26467ca9ccdcd8a274a0fa6d5e0b0c4649dd489b5e14750ac49eadc3ee709678"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "7acf539dab338ef6f086637805aa7f04e1377b55a8bd4ee1178599987518a5b0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "6113946d7fd441704e605003cacfd550231fa13834d756d3362c1cf9b4b4c836"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "c7ec713e82c09c6a08c3d5198addb4f18cfa98175653314f4138948c0e282237"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "507ed90f01a9a118637e6320b168c2e69622c2aa8891c3d4f94e91b64db33a61"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "61bc74277539a2a32df8a6a697b93ebaed8129819f80fd12d50e613941e64abe"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "1fdbc0b2b8cc8c3a78bb3c50e64e2bad1f5afea1f26fe0e08383b8a500ba8125"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "b6235ba26a9ba7b9476f66aaa72ff86e6336b72eed88b441150a0ba7ba111870"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "3d03edcb17a1476209833ec52593188d7a71013c7d9a6ef1fd8dd064c0a1853d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "62b8dda0fd7d04d6e0e68ecce38ab65d117fdc9bb7e4a2a66938caf62d5a45a9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "f952443248e8e47948dac61a8bfd08172a9c48a03f74fcfa7894be0ddcfb8723"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191672, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "5bd6b2d48532aba41c18cb7166737934f05588515018ce6a2926a321909784df"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187064, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "356055c297e48dfa62e9166cd209e8ad2580f9fa2acd16fca709ed2d9aff682b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 192792, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "dc2c9ab2a05307aaee5054afa23bc6549fac4042c2b850022944718e5a9a91fa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188056, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "65d024c23ceb07ec6400745920af48b5501b246994c3513f5506c168202d6447"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "b3e0e7bb213e44c14762750f96458459641322cafe85f756e40c848bb67bbb10"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "ea58cd931cdf472a7d52bcf7836876b68371ec75234b9c8bec735fd7b5b7aead"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "574688c849c9e73ff924d57479b4b32d2a59d761e5c5b85ed8c72274afcb9fcd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "1ee66c5e52cb440f44d7f702e880f388ba819daf84e80cb1801714de91fda32a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "094225de8e88136805dd8d55e50a133ac5b4fbcaaa7679c8b3b6b7898525633e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "b11419764039813412a67d446b6dac5bf9a0775f4e9ecec546c9f4e594033c88"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "fe65b4ca8e93fd601c603b377d0d122fdfcecb3c070036dcbdb9868e2ea8014e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157872, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "46c59687eea2b3f9b4e4de7b7b1288fa492e10e6f3e7b98116b6b078096eea1d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "bcba42ae50608543620425ae88c5701635a57f6f8000532a0e73c18f9d676ffc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 153264, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "123b7337d26fc6b7378d8c8fa6db5623c143b5c64d388359c8dbde675a153a67"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "f0b21cebfdaeaf70335ef54932f00fdb55396f836734075c1689957e21808877"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "e38c147be290760c404a12523ba27f3c94e02535d36beecf01aa02feece9bec0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163184, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "1dfe0aa3e24191fa3354bac8bf0cc3adc8e541b3baf651fa8e3172ae5ea389d1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 158992, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "679f5833c64689a979333a66e94867c31fa4b4d15eede66a103dfa3797e457fb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "3ed8dd122225d89d0e809e71fa15c2e5f594d1dd0d05947a206f00e7d45ac411"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 154256, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "3933ed0ced68a2922ce480cee2d948562d436ec17cd6d5e8c3230fe13da851bc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "2f59adb72ba1d60ec2d335cceebcc6297074e07948965fe3a0a1231ee5634611"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "5b885c036f814f03cdaef6c0f9b7f4c1cb3c8a9dca8832e32b19fd65517b8f32"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "7ec832640874be2ee7823bd9347e7c4392cdfaa85ebec298ba622e83bacf5b25"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "84b4e916fdaff2f370ecbb443706dc92f69f5b97d08a000ac9bb2982f61b11d3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "738ecd63cb8fb1e06e258e2ec5662204f87c2f75aa06d28304d4975a7111f4da"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "de10683aa1d1adc1ab0bb09be8158e8409caeea42e5c022d693d7afe3efc9c2b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "345cde39e5a685f500e94797c7dbb2b701938b711effb8938cf0eba3e4010cc6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "0a70650e9e62784409fa308d19182c4d10ab33bd565532afae6048d64216ede2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "c228cc1b135843328999cd721ae5acbedab2d942e3f64ad8fe7cfa580e4b1d38"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "6edb7781f63cee8e38eee93be714b1c133a517b061ebf2d49d0fde5924609d55"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "986a84957181b98f774d901fc5fcc7d35250e8142b312b7fc73613bd28716138"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "bd6f2d79be0ff4a0655a213088c92b755adcf82ad98660763a8b856e090e2866"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "13da2eb20c8f872e3076ea37b61c8a09e0aae149b01045674b2f05e9c190cb30"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "f0cd7445ab472667949c6951742ea7a81bb4ada05c206933f388afc7555d2cfc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "3bb6d96d5c2005e9e107a8871e02024a37de16d4e559f334f5dc7cda94598912"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "ce87c738e0fefb823e36096b362973ddbe15752ab2a3d5cd06e08f43daaa6e73"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "c5291b946b3f3d79017a609cbddd9c30c9bd12c9ab455d64c51ea2abead08c80"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "f30f6425612075338cd5cdeea9a563a234bd2f6dc34d5e02aa61bfdc5778597d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "fd1e2e914717cf4c3eef4cb1fe905728af2e9d63b4d92781fafa18d0906e1192"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "dfe3d13f16cff7f5012bc3487247a9f16f3fe680a472e9719c6e61c082f3fd31"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "e7edbeeb99555b4f601f839c60a3b8a9206f33b6b7b99e0739cb8308d75d5043"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "b68a6cf7db517038e288ee882d773ab96b88a9c781e6031ef44317600bb77162"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "927f9d934f0a0bd2c2b8fec7dcd5711b76ab4bc6fb9489b1b9c4be6125b9a1f1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "9b572f414b55cc7c3f7a580a34429b72a00208d3eca072188657320a7852406f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "75a5424207c3dfe5a0a0a9bb0e32007ebafac8a7386da848b68ff1756e2b2fea"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "569468956f5bb33c7dc6c8dd814ab36c27f05e934d08cf90c743ea66a6f193c6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "958b53a5b5d055b0e9c960b18a8508c1593553d8816b24e7a24be2c146fcf1f9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "3f7cae9e188438cac36b890b64a1db9727ef4a674c36c04e67451d198ddb0e4a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "d77c17a7ae761f69868c43b256e0613715b698801b11d88b63228b728a0f9d9a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "508e15da8f99d06b1b4326144708b4d3f69f3dd6749fdff549ea8e214c61c787"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "3c2529da8bcad9b642744bc4ea94a65b427c7e673d65c1df7a839b8be15a96a5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "cf25addf98e9981f1095a38f6f6f50af6c5e0ba73f7865d3d4d7752969498401"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "0824f7629ec29c784d76f6860f471bb0fa1529acba61e3ece07c15d42cfcc4db"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "e910142bf208ac2145562d1ef1f6144a748a96da9e67c13c91e407650223346b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "2e2ef289ae847bceee71e7926fe1e64b9ac45d61c6d2733000245aacbab7a429"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "1c52cf22190730350500fbe18f23b888bc413798f6085e89202ffde8e6462f3b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "df098fb35dc2be0a5a51e89a2e761ffbd27e69efea50921dee1c2d41eaedf607"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "59a6daf2d1d3f130ac949cbd5a5cfcb8dc2be368e6cb9346ef080a63398c6708"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "3b5e237ce89a83c0850b8afec9d84c1faa5ee768120514f2cd77e93c6472d97b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "7f11eba7fc8531bd65fb7b3dfd53fce9bea354250af39c08f58a5738d99c25d2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 195256, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "fd6d3993055096808973074a6a4e192096d86b95e2af1551b532e62d570b030b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 188600, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "bc54e4552ed40332b1edd5a75378aeb6bbcbf19ce06ad9e46d9a8297765416f4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "df3cefe74b5fa7983008056962d969706c7b552889bb602e49b0fce81bb4649c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "8468bd8baa74cbc69e0d252b3a1f70069873c97ceadce730774ae7716b1fc842"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "758e4e7e3cd5491cb5698dd4ea8105e1765bcd5bc4550b426edd9b569bca69c0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "a4f3605632fb1b8de1030f079e2b5dc341dee5cb38e394c2626d1fff1186b8fc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "e7ea83686fb8c14f675eabe462d5e94ed62cb2db5f501c6cebcc072fd21c5311"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "3dbe5d942ebd9b6c70372a5a3e46b55028ab8fb6218fddd20e6193a315d9adc7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "12fe75486eaffbb1b3571b4bab5589b804e47ce9ab89c45b2dfe46d4e92b7af0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "710ec7e2bed940ad7beea06e1405aca492a81561ed9adb134701e95b4a786c2b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "ff7a6370b355380310cd77e894ce4dd3bc07912e6afb0bfac572c1dcedd6e5bd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "d751e0bf9da40c2bd46f5689f43c493d6d10046b65a9fae2e75bb2a9a32ebb63"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "1d7ae7c04449f4be0ce86b170c74611421e58349b81acdc9ed5b86c0910a8a62"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "9607b69341b623addc3c02ebeb6b6893feb2c514b1c2aa19ae269b4f9861994c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "82789b6f24a82c1a18920e02ec1fcf338c63d92da60984ea910df7abf5f4f9ce"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "feef1827f52370f1098566103be10329e0f72fb07ad2fdbfff7ad2a0d82d2d3e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "b9c3bef38fd6214439cfa49dc89828f49719fafcf07620ad111a8bd7d681a925"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "309886a6e10f6c78477a3e8da599a139d2ce97a430fedf42bd2e2265d8b9b28d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "6bd441170287c14c6f3b3e8d25859f30f0a79a7ca0016306a17d10e05280c511"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "a2067c2756a2841c5f8364f5ea9eb12216677c654d08021c03319260cc900922"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "e14d25afc64fc1c3d39458f80a72bf4dd912bf10e998458cee0a8e4a6516ac07"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "a0f6b0326a364761fc1221553dc69033bda0bd41e19514844d00204cebe8d2a5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "699426ad9029684e95e71c68b7e6a120c062b39c918f62d2a66f9a84a5b1d815"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "3ccd6ab8cfcd60fec9d4a13e19b413f768cc41d2a01fcdea1bebec231109432e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "5d3e15d3ccf6081ac6ad0cd28a24779ba0aa01f003d0d94e51535e9bc17f21ea"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "a82ad1d0fc4418cb47a98eb713d2f5e42e8042a09ad1e6ac184083eda803bfd1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "e6e7c72e40c40141ce17376d70082e919e925b6f3f2569ec0eed061d1980c987"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "56b19ec5446148f467c141556c0997d9f148232dfbacf9991466cc3e606ac676"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "f8215647feddd9cbbb78a7fa33424ff19218099bbb278b58b04bc36e5a4f9010"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "9f6b48123291b1e981bd57497b8ff24e792f51070e841200eba73da5ab54c2f7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "615e63a20575ddb3317e26b131b4f0b9c76f20a55d357094dde3143300023a1b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 191832, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, true, "4d438f605eb7e4c0edb15d9755ce14504d6ad8179aff627ae8d41f907d2072de"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "365500f2aca202d5c7e1560fc2cb9198da185d9ba9471faf30679a9ef71203ec"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 157008, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, true, "ac328987db0323aa138780198365272c87e73ce72e0e538318364e0a7337cf7c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "bfb79da390a397466a3afdfe979595f474d2c90f57314acc679109f3e9724522"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "81337123338fa4479da555acb86dbd4503fb7b8d3277ddc068a40b4d3dac53a5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 165296, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, true, "915f41d30eaf75b356defda826f0505f15030176b4eb12b9f058a9b93c10f4cd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, true, "9848b6b6bb38a04da98222b972a74d612bb5cdb1c2e4b83d9dfb8f28b4712ba9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "b869279eea9989ac692f4b0004bdda259d07c03eb2c7ba0747956e8bc449e34d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 191832, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, true, "ca735703b9ff5b802afd5acf7f73fe778f6e7b0300f82e39021f31acc235a982"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "dd58c878a956fd21d4223936cade1ea98ebe497061f650d134048a738f4262f7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 157008, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, true, "6253d327c291c5acfb5218d771789c49c224986ed88b2c7b7b9e13d273e19a9f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "9a41ca66aca585491e30d91fdae4bf2c07c5c5427888f5d398010fd059beb179"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "94973b992e42591d76eba0ac969f88c604842075a8a612ea006d2f6ce6450a48"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 165296, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, true, "1a2ae1bc976be2a8afadcced4258cf024f8b7fbfcd983183c864ed2741087e52"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, true, "34189d61c7d7f50bf78f5e0afb0905a6a18c1ff41f7ea92942c30cb7f93fa83f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "d2f230d3298c1d5d67db3cc785be81e77b37616a43d0bee9d8ba8d14ef85805f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "dc6c4adf53c7ca775e78f8e1b19e971ee7b0cb0bb0b2b0d8414381f46635b47d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "1024d06044e1922dd550f0c74fa25b58d26200104f03c3fce4e095d4bc8c4686"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "fc201c21b77af9cfa319379c339fea944cd10c5961b32b913cd33e5babe3d95d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "a75ebc090990573336fa520dbc6fc56963b76a5b554636ffd1b3754f5269db7f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "fd4bc168c378e9abaaec9a8d681cd3e25246fc61d23bde97a8eba5f9e4b2db12"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "cdb833f1a05a4d900b0f48c5468720cdd083d4a7aff5c0de78e345d8a248913e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "df882b9d1faffb23634b409f8b2a6055aa54db5b2557262782003948c4d5f497"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "31770812c13ac12ff4d756cdbe22835e70318f3e638dabd933934f8193a46fc2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "5c8931b9fc95821fda64866bd1c63665a4afab81f2546fa4d89c23b4f75e1b40"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "4b4033b8753d7b9de7b64f2ea96976e4d678e3a284401e425fc93cb4cf60834e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "02a345b8a52526d52b7f89ae173da7d6fe8ae39e2afab3affa028c08fabbf84a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "92badc667b1294b65f9224ca7c97b3dde9ebe867a4457a8c48b02450305ca542"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "7ad7d3bb3768cbff360e2ef1245182dfda007be2f63887beb5db185b9187748c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "486fc12d6a73fc2061709583045bc87f3eeb24ab9ed6b3d34198241fe6b229d1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "c561646eb93a28c4d83dda8f1e4e30dc8e649f27012fa1e7a79c4c9b5cdeafaa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "ebdc3cc087a33ecb0ebfad2b878b9eb13ad1430be49a35efb89a7ce2db14e3d3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "300cff02afec6a3c76e7dfdcbab1e2947a82fe02015233a4a48b80fc7eaa39cf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "5167a69abbe66aa53bba6ca2ba7e77b43eb330c29f7e2cbe40c47559dfcb5411"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "244ce00cb8280f5a12b74887512cefbc0cf083f1c447a4ba19946900547160ba"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "b7fa55e098b232613c14bf6a4079c9e58d9b6a7cc6dc1db799458b8ced37971a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "5047a805a0861500e48a3969d365f8757450b84c3e15805bf8f7d0afdcf8e400"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "3542d6c4a1a734a08b1868aeef08eb26b8c84e08e95cc5d71a60dea900fc5dcd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "bb6afd732256c5a59a7e7c1b34f822fdf5052028c9b6cbcc03503b3b3c01b325"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "ce8a88de5974b630cad224cad1685423c9b54e72545a125e63ed6217654f7e83"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "097e6cc66ff509a22d36c451dea552da309a1aae350e874ed60b63c9998f0012"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "232828ad85e6224f5d8de3f1d5baedfac1d282876f06a07cac720a8f65533d01"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "28339a908092dd452b611d94b9f78528af7087fe4c6357100d2f88906ad11886"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "f74a9d9724764d5a7ae3f31dd45095c06054258bd901e238edcef3829c44156f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "35009c5f0c9fd8901ed64b0380c0eeff3a32050ee2c6bd5cbcdd19b8d4217c03"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "e8533d8d38951f066e4ab968007a46b092c041cb4e67f6384a42b6dc615e3e6e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "759f3b6ccf19078458676d06b2a58a7c6d88304da317f23b3ea4cba86783d0fc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "593cedd438ac6926d4f3b94e59c45d0434f13983bb5d8bd02f34d18b0d6bdc0c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "ef3a8e0785130db3f9f183c6a99d1d1a08f6653c1b7139cdfa37857c6903cca1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "c487bda0bef3ac0727b99e69e139f2eba8886579681f089508e0f400514bdbe9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "995ce9d0d4eab432bdcc75f421303fa9e574aeaa1e7c9373199846c99b0cd946"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "b54cf3744b2914db6f1b49da4fcbb6b2bf641341b23d890fe872fe960e3e7e1a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "28aa17e2542cd722bbac989384bc2978a8653b693bc2ede77c7b521dee9a8daa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "ddc36334fe0a14d3c108459b0c55e94e9bc2f95670f157301be3932c433f8bff"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "4af3a070e60f6b5ea510151a3092fc25154d85927b4c57405649080b56aa42ab"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "76a6366e099aee724fc5e1e462e02ea4df3938c379321f9e72a9db34c7527303"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "e167ffd6a46fe13811da13dd9eac3bf475a700fbf09da3924b6b6783c3a39aac"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "44ca826c28eabfac2b3a0b13f789962cb80e9cef7849a146fc87b5790341fdb0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "2f37b486611ad2d88876a203b393d53ad6a4287d0a727df1ca1cc742922bd884"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "d8b2a77a162d234b2326cff7b444342b51099304682ee6d35c3a09d8d475619e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "d845ccb6882ebf4e1780a484f971191f67fdda0646c99387373c4e6270a9d8a7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "4130d28633d6f6ee93c5874e15964447d000d4e1c7bec27053fd0314c27d6def"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "5edffbceef1b73212ffa3b09f8c96b1f9eabadcd06ae5398e314c2b73c528b31"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "424bf10e88d13c28623c97f56aef5f04cdcd98b024682a1f800fa698099c35ed"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "cdde9c7b35e59a7669b32dc7b672f8f38e5a6d2f291655c6907cd400d6e5898d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "6a25169d4475cec2b90dd17b4fe7ddeee73695356fabd160d184afd5a7d2ff46"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "328e69d6738cdb91f262564e3ef6936851ab90c5ec4c31cc557e7f856a4c20e6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "f84ad6740d92f03a74cf533499c9d3f66f44105dc5808fbd49d6e92b772b819e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "c019e8b51f9d30feb768d721b235b12a9244dc5e444b2a8f5f658f1393d189ff"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "589da41ad1489662ea3fdd2cd08e10b0f5291654499b6cf297172b631bf6dbd7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "38ad3b2356abfed66b9f278d32b251ad002f6bb6173d80f9ca90b068c6709f5e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "bad0c4d68ca016757b5aaf443bf5d7068fe395ae2f0c2b85f6d1f43f5f3b80e7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "b08ee9bf027b501f6d645255c21ca04a7b729557fec3eebd633dfc773563133c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "a8ace7d514520f2d95e9d5d3a35969399fff12f74d1a07a52d98b6f08c4ffd53"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "4a7a632c2d9d7c10d4e92323d0b87e36564dceaa55e73389d1353d85b9e98f7c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 190792, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "1824769515108cdaffe819efc0987923019b1f897a5286a0389d5328e7507866"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187208, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "ce7bfdfaef55860d0eba543066c63b980d39622ac48b8e2cecb8e8a12c068ed3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 191912, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "bfd0628123b831657bf913a08c1f0bb06c7d2cd393f0b1088dd43278cb9226ac"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 188200, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "7603586fff31b8369c0543994859315459926f3e23d50ee8eded8375baf5b448"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "54cfad01d605bd88e8e269fb675f058d6b098c2a4c33235700895b43dc543c56"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "89da70d34b56a27f0cfd2bd259dbbf0f64c3a72721dcd5cc03eaf006300aee0a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "0dd540acf8c80c843fd8312f813750c47ba9b5a7a755742936cd4c059e96ba47"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "4beea932dfcacc732a095e4f404f29e225525fd8f128cea66344025358feca45"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "de82e0cf4d19f44666590156212930aa8f28c9468ba58b6c12e356c186a8d57d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "34d31084b89fa1e4adfae38654f0c3aa2c05f8be0e5bf264bd71525995a835f9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "84020fbb44e9e0629170c7a59c221b3292bc01edb75980b60331abfa77795a78"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 155968, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "f45941bdda52feebd6476d81b81a179292b37b585d3f8ae983cc74501df23f7a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "8ae060fa1bd06692d50e3f47eac5f24fca7f31273baf326a18763a5dd3d1adb7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152384, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "39ad3c904230584267e71dc1ff63e58f40fba7b480ac22316b87f25d6204bb03"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "7724d98151f30e974bede7c04b328cc586e55b0bf74bff8d6ffb34b9dac81b8f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "610385ddac5b7d0f1bb3797eaa56927a1a591f3d9de0ebf1f7ec0310a4a89887"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 159232, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "248797e5485947b6ae9f794f985ebb9e941bf143fedee3d355df4d13e22db3ba"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 157088, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "5ad6ea88054396f2d6591947c73904c2c4508fcd7e424d287d9d83ce703f8860"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 154496, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "9a9f0f5887f08eb1b22ad0ac41b3abac05c76a7646d52d4e3f9498f067c82050"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 153376, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "322c237a2d03be59cc5ffb58b3afcabb234239076ccd64a719a7545de93abb0d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "74e4a120e238fc11d81459b70b4966e9802d85ee109308ac17211d8dafa778a2"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "d3a8b681d7114c4b41cc6a0369d0c282f1be69ae8cc1af49655a90aebccabae5"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "750235355c42946f0cf6dab8091459da837d87373c8662e0fbd294a0e0d5dc2b"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "a2f540c6681d4d8a013b210d2ec93afef8571e4e0c56664bd125eb077bedc880"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "a6eeee3491259fa0d2aa78b9eb579b62c31295777901c6acf02fe0482a189450"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "3fda90662276c0353e320a6e4feb8acb0e24e9a4d2304a4d94197e548c81bb80"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "ff2cb7cf1ff91bfb68d3dd2fd3769a3b56b02ce44d6b9f3066fbc9f713d5b896"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "537e9f52f40c7a492b65a552dd43342df2ea3a3e60d5bb8348a7f836b8d1f1f6"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "e3bddf5ae4085cb923a357acb4ebc2c4237ef2a00d2954c3667b05c3ae3f4cdb"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "419791013ddc16eaeb0f649a26fa3acce49b6ebf82b2cb40bbafda07f5ca836e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "7a4beee427a44636b6b2dede70dc7a388994a0e8a46c02cc66a2f41a92a79f78"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "bae946efdb05074ebd5c6e4c567f261ebccfdbcb07a6491f09db6ae6d8bb0084"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "d30e1519fa765d7b4ee2c675e8c88eb03d4837531e5eddb2491d3b5b74e25880"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "7b7fc01cd119b259e9653916ee2f0087f3b4d9bfd888de7d2363fe726cdf0c10"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "4deef58c1cec6b55f3e15e88e78eb2fffd66d31b44aa5e9a88d0ddb7b32cc1d0"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "7c79bedf7ee28a80321b885a81b91aada1040f6be74be2c732e674bb69a1a94a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "9195513797de7be42963e6ed8e1e013d7000cd4b5d104f1141baa083fda20509"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "b5583172e46ea133e4c881174af5d2a5652b6d50ad84d61ac916d67195141eaa"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "98cac8ea6604181285b4630e6e9ac3c295b3119b3651796850de038cfc7360fc"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "ce75e60964bc588cdc5cc81d43b3e1fa6e10d21eb5faeb590235cef13a945c68"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228328, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "c99fdc7cea910270e005481b94ad6546bcc2214d3bae98a66908fb88070f7037"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 228344, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, true, "17680bc4ea344b91b4b2e2e18c5e87a7ac023b62345dc74c9321ea11bc8b6d48"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "1de857213baa6e54d5ceea9cdf40237a8f406e835bca2f8f7c5e87de2c593069"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 164976, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, true, "d1ef13b8c3c02fda738ae1bae1346b2e60357d7111d5e41ed52e12a5868cb9b4"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 197824, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "3a1acde2551d43430fdafd89bb12860a7c6f2ddb82fa712a2b44fe973d872059"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 164944, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "6fe6a92e003b38a7814d2b3c3d8902fc76523f45285342a7c410b334a42de23e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 197840, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, true, "0a65ce33c6aaa8d285786b0afa762794ea171acacaf3d342220baad83e8fad68"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, true, "6e6aa7611c5ca77477d8d87787a33cae9d9348280b0c41dd7198fab8ec38b5ca"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228328, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "30d6db830b1e5b3df6ff22763a7b1252476d9b7ec73bd30217b19bf697b4a162"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 228344, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, true, "ddf63d8be64cf471e1a73f49618342b419001c1e73a67e6291d9e85ec79cd85a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "3f132a094928381ede585452b5d80cd9a4b85d49d1c5990a99fdc748337af519"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 164976, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, true, "57896c0aaaed2e71208910ffdb720373a9a60d139cef286d6c400737b8f6143e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 197824, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "0aa8b83decd7f1cb9d14535f9c1f2edcbdabf9314fb8404bfcc0cdd83b615974"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 164944, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "770298b64b54079703529a92c7c16431750406e0f162f72374616d8890f96ddb"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 197840, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, true, "4e802122bad5b3ae05f2a83ec02d33ecfdffe1d2795193470e9a80c652b4b5d8"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, true, "5d3f80799e22fd8aa38c6d70e23096081a5c5fabef5a7ceea5740554caa29f48"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 183400, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "84147e9a11290bd1e866807399f6d3618ca873f07b371bee85e0c1b44e79037f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174696, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "853789f68edb3ae1f812460f0eb149206b4d55fcb02d88ab4d66f67778463c94"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 180408, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "634d6dd759840d7cf0696e1aaf149d4d06e290a4b4ce320f1a74966d18df14c1"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 173624, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "c45b0269be20eeb9cd3d59714a63a2ccd612343545d62c9229f24c9ef401d269"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "b082b401889a0c7f3f848ed292eec28b8e8534e2f6afa3c49d146ba568de394d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "beebaacf673a7d6af606dea2b9cc838e08037aad32b76a7b3302cebabdc0fbd7"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "5018d94afeffdfa4ddb05eb7ac82b8b2288905ad537cb82dcfade94eecd9092d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "6988b08f50d14b5f0244b4f968a3f76b6e453b627e6a1b45c11abb65d962f5ca"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "70ce24035ff8b01be12f824174da02ef9ccdf4c9d6adfe698687915aeb45a5b4"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "6aac5a1eeaccfddc1eb7b6abcf6a92eb76705156b0969db8f06e9e683a49efd0"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "0086249db6479d36d4cecd0d1e6a43025f8d48b2d2ba7a0d46d5d209d80811f9"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "2bc6170137e7287416415bc1d2507798464a916c8358c30d81093b7f11745156"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "49b08f8ac4a147880e0f08874e45d94fb839a82fcad8c6a008cf204086ff7572"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "98301ba023a2899ec41ad3b4f48ab9fa335dc92cb6a6334e8d43dede4a471702"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "9b09263506c33277c5c3c4b147d1d2d9953c8ff84b6736b9ffae07aaa6bfbf16"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "799763b1879ed79d389f3573e9ac491858a5fee7183cdf524de0edaf105a6cc3"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 150800, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "83b09e53045fb0f644e4967f30b4cda18e8365bb54bb9857980447978e6f0421"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "74701cd70a56343316444572e9e07371f3519bbc12af687cd5dba0c294a9aa38"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 141968, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "dfa34221186a18f0f629378d865b6b1da20d5d77a1089227c5dfe74b140f3dc7"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "a98c9a9a24873719cac522bd0710de7b312458dc0ca0588728bd624bc71106ba"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 183400, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "f664d83a49791847de6bd86088356bfb874f591a8d8d27a6a81b61ea5de87462"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174696, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "4f9f2096c59d5c52148e44199af3ff789b07e71c93eaaeab5154faf0efc10cb2"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 180408, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "fe2f0a85b155a464b34a241a941420fc591ad5415db902a5e7aa8dbf6080f149"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 173624, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "3e8d864d14779bdfea0941e5b678aef5cc26bc1d89e7bf75a7dbd779526908bb"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "33adec2e5a39a53351b1963483938ec4f51066b4d7d2a59aaad59cf85454b0c7"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "82410403f4fe5988db6824d5eed375449ad9003d0e156a0eed9540c12e27f4b5"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "81ad12e98aa0049120b1e4794b40a6e5dcf01703a9a37dc9ba0cd96020f59dac"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "230ca62ab397f9d4462bafec696d90d200c6ea14092ec9de3e61281f2e63d9e4"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "255de98537f8503d16d32ce43db2dfd764ddd49b3596ea4599574244dc7467e2"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "61b16a812157bec6c2acb3848c1e4aef092712fe335f32050f2468c6217aedbe"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "8790baddce0fdacf2046c191f1b4b9143d0056df586e1c908f051878a1852a18"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "9efb723a94269e0e041e0fc1f8d25287f509f4229de1cff6dd3f5c6dc79a4b10"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "00254121221a639be18dfc7cbdcbd8666c4a054871527b003204c74e61ceaf29"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "43e50cf64a327525fa03d5edae1e672e2621662da612820ab912d21e37159993"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "756bc436440468c820407d982e5a27f2792583436a68eee9291bc05a28569c54"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "01efbe9b2debd3ad4eebb3fb4ef1e19606f1a6cab0ac88625279102151e774ce"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 150800, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "bf12b56830f2b8b7b2de69290474687227c406eb3c8c8c57ea7f7e6af2b00061"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "7a3cdeda1f5b3b2df4f568ba03450f53531570ee1f6808443b3a5eeeed52914b"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 141968, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "883c2fc68529e1c1d0cc5a36123cec2942b8fcb0e4a4c5ee23f4e3d0f7a1b67a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "319877021b717404984d5daaae5ab233e6f5522af1fce4ce6584b9bf7e17fcee"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 183400, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "134786d081e87517fef2a61070c768d6c80013b20a3b3633c5d5404e75278327"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174696, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "ba20437a2d018f3bb9848df73ab113bd231e501d9f21faa4d5c7bb100077f558"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 180408, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "0d3dc6889fd53524d60fceb71dd9856c5d2bf76123369fbfc0c4c4ed1bcbdf72"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 173624, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "e30a77b41ccd09072463488b7baf793efd6270e32eda1aebf91910e489e0079a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "bd7f60313b0756a3ed479ff7bf0145585e347143eb02fe414bbddc3a3f03f907"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "e5ef098f210948993dc3785978bd585b79cc2a790fe46f158f1a1a5eb52a0936"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "50f950a976399060c1d53570bdcf0aae6b38d12be57a5ecfdb096a309fa58a14"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "6ce6d009057c12c2bdb131aad1107713e9b46d388f3546df8220240964eb6dcb"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "d12dd26f5d705237c14e637e5003d321e8af6644b371585c4cfb0ab29d19315a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "0d9405c713f8f530c4b255d2c061215b8dfc226e9905016165e89843f0ae288d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "ba8e862e22597d0909697b6866264fd3e31aed157f5c48e16dd1aa6275c46274"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "e115d807ff0aa958084cc83354fcf086d484613183e667746516806942db6045"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "4bea8a25655164998a13f63e4f3f24a4754ff98622b6d759a595820a366710b3"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "b45ed41d479dfd486e2b30c626e87386a1c2e430ad12a443f81bb23e9086eed2"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "2551565273a55f536b664a17f7e69661442581c8e71839d1077cc81bbb498c37"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "61bb0be3f0c2b792d11681a5e66a61f7dcde8c0f50573e7ec76ea85c050f4d9c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 150800, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "6800a7ea1ed06b7b158c44a3a7bc6d8b53cdf3483e1112407af9b4cd7745bbd3"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "bdb62afd62ad3e63d28b07646c22750aadccbc45aa551ab4d1f88df073d6434a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 141968, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "245c85da2d6507cd00bb980114f6d560f9d127f3b6e6ee751674acef2dc3c262"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "3790c9c2ed8471093647ff14f04c0235b0a08c181e477eac95546a969d7b748d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 183400, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "8f773cd9d2bd2c6ca885d4aaf191c73a3323fa8be294e58effad837688d4df11"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174696, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "539203042afe40abb3ef4b5b215102eaa02446a286fce9b1691064412784d43e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 180408, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "35fe34dd5b1fc1d77a187f92a274b561bfea70d942445b9a555e566de4be3102"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 173624, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "8db0e1957c61cb12dab53ddeb1c3095df5c41776ea04317a37858d38b38da471"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "406511d72e974fbee85863419a7f9aba1f6c0fb5a4aa09762c789c2d9bced714"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "78f7a8153585ae6425fcce7919b0b2250c48f8a0183376ec3000724d9092e0c4"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "12b1d84ac449603659f8ed40d88c473423c2cfa4ccee13f8a68d06f1545448bb"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "fc40843a5f631d92354b57749c4739fc0f6d9e5f9e0d21729e29ab3363722354"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "7efc4f02369134d9c15d42bce1ee92044777ab33498a4cc9ac2704c911a241ec"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "c76707e9ebae6fd8de81bb640a6d938628614f200ffab5acc9d7d491d8732888"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "517609a32d1cd20e3c4b54b001a7445afb65addd3d4f64cae7f26372d44c650e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 149600, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "aa1415e9f69ca02fa7207ef3e3eab061431b874d3d7dc3a8a3833a4ae1122c69"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "3491cf02707df84aaeb57cae50569baa5bcfc990179f58990936c707f7ff0db6"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 140896, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "e13399fa2a402c6b1f3520f244d078f30c892c235868e5b06cccb9ad4a0c1be5"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "64498718858f346cac4850588a1a61e3b320582eb654f903ff2113fd68a3eb41"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "ceccec2a044652c1d825405c7fd5b3060d7e5aa7259a9b308b8b56dabebf3664"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 150800, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "47617489841ee92bf5e8a81d0c150aac83e6664dd23953464940438b14397005"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 146608, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "f8ea35306ad9cde224dc9f5e125c3972f9b08a4057036c396e1148ebb5e7f065"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 141968, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "1a03da270825eb7466e624583f8e10208862aa5f641cbf95fd4da6f60e447859"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 139824, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "b94a432b33d0a7d7b28587000ca024bb764ac5515a7cc896cc9cef153eebf459"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "d5dd8f0426aaed518794a37db7d55514502b638acbb3a286349545f25d3d2fd2"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "b937528e56e1c63d56714a09e8f37af60255ccce0a011470430f413b8396b673"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "e1b5e6000e66fb102723c1b9772ae9049ef60591559bbeb050fa478cdc6e6791"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "5faacc359ad9f0d36fa376328b74bd93008590e9982bd97f7877e2b1aad9b5b1"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "ee4425531db86acb2c8d171a31715c8ef26af6a82b152bdde55d1e38257cfe10"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "613eaa6cecd25c53496c45d40e141c1a99e599ded3e4490e03c716713db57401"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "ca826d753ddf34cc54487f73f20bd40176b639678f976b8b75e9f0865f9a59b5"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "d4741fc62aba585f3786edb467f0fc2dcdaf1d914c468a847c3dff8305d35059"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "0e301d4c276493b51ece5cd73a4b7f422ac0c54f06e7f3b9c78ef6025b1e01ce"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "0251bd396078c0f08d600eec894254180685d06eae18c052c6369dbe58f65e6d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191080, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "0a364af96b7d0389be2fffd2f82729571a19f1b6d3643c58fc56364111b0521b"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 178280, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "52f504c6ccd1ca504b3b750b962a3cc1403f5e48d7e68ad4a111128580e5771b"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "cf0f738b4402fd384e84e67c936a1eefb3c96a12ab456916b2102879998bb8ed"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "bced178c5d64b6622e5fb9ef89d2ad90b5f110588282f7fcb4b513aa8038e49c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "1f928cfc9d4503bdc49e90e166936b339f8bf941373a13b1f9fbd0ce5be159b2"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "b1bef01cfe650113c1d9596bebb4861f357342e95027b2559b231245507ffdf0"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "59f2c3fc7f060e066283222fc37d81e4a4176151e8b941c30adbc1f3829f464e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "b8abf8ec0a55e71dbdb6a11843c7d4075ac22387cdbdd8b27b6c2c631331a09f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "5000cd9fc2c379f4d222d09ccf7e89e965dd5f984ed573205173c924c035b0f8"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "c508dbbcc16fc1cf510f3f0cd1ae788a3d96af1c7a88b60e3ce7aad965f94642"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191080, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "b21cd55eaa090f3ad2974bcbbec3d3cf8c09891983875b67d6927a38bbea4ba6"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 178280, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "356655e3698e8ffbcabdf96c6f3257bd6b0af00b509178501fc274c624695142"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "3d54319d3f3de9365d7b3473880d93ec33288b227d24c5b63a1248d3d91068ae"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "24b433cccd90a7fb6c1300e432f0025ec19b70df9af51e629916706eacde7971"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "4c558994bdc460b33f973c6a294872c326575119cb03ed95241d324eb7e8f59a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "1592cb0cc40ceb19e64bd161c5eb019472b1c9d73044b878d4a02fefb5450303"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "49b37519554e94be8405aae4f13709748a2df69926f1563370cb31f50b80ac67"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "a68236f236810ed7b6cab1d3552b3ded4d90bca58288f7b62ded479a0d6add41"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "fc330858ac434e188d4ced65527ea273cf248d76f80c4ef21b8e1dc9c0e2828a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "d14071b225b8b2bbf6dbc800b1287a3217d994da8443327f2197e274b9123752"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191080, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "0a24e181f9b51c943278f4b323543e9214c4076d1d96227fa90baefaaad2c0a9"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 178280, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "14975dd8837452b141e82669e9af706157d9b2c94ad0ee951c1de7dc3ff7f2da"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "4aa4dc762953d542b88bd5cc1fbdce324a1e7aece88fc1b812da0c8371cc39cc"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "51760a5b378f5d84f9adaf30d40635373d1f090e80d3843f0be3cbbc889952e9"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "180a8c5c5df8c3aef45bc0631af274684fc383f6560e253e624bbb6a791e8544"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "4bcef72d6770136ae8bf743ecf1675eb67c2b6b71abe1de793d3834d608070a0"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "6cda902be597fd456ce04c8fd8997b011704470351821ccf725c26516c5f9209"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "f7712ecb68edafbfdfeb66f4311f8c889ad1df747857d880d88eb22934c6c344"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "384a8fadee4c976208864edb2a133a1224e8a1b0eb5eaf93ee8a447233da7f49"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "58e1c609b4727534a7d98139cc12df64044531b77943b939eeda768ab3b113e5"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191080, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "addf64287a616c874f51909851aee2a8849dfaa2a8c86cd208ff2d5d16525b2e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 178280, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "6e8d63eadd46eddc4743c18ce76f17bc100fe217820a1dabe347030ed325bbe5"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "db08d142a1c7d51230a55c939922114adda7b471bb23c4238be9ab3a33171b75"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "e1bb5f7689ae182149e42c1e2da2132eb2f0bc4b7dcef246e96e8843f05da0b4"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "36f00c64ef1ca19e058a4cce0888f26203edb87f375258133b6e2384b98241ad"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "5d9e00c3fb31eef9a791ecd7cc2b63d5ec2cea7c6b2908d523b17039c48b7765"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "8878e77fe0ba602aa6e2cfe84ccdea2cfaa84a755ba3461e26c5a3b37a58aad1"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157792, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "acdc7ff2c5cff019ae96bb459e37933809bb9da29f0ae36ca1ac6d058b3d3fd7"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "b829c504878765130daed599464a50b303e7bcb5a4c40b9162eeb286802c530a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 144992, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "b39e3cea488c40491d7be017ad7a9976616f88895d2faf358d801a35a24cc9f5"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "55c0d6ed6de44d1510610cbf2a53007a46401bae5eb18964e8ef1627dc904649"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "85c72b714b43cdbf17b2610929564b7bcef08bb4f5692e8eb17094a14bed432a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "cb89287132dc7bd0353bebccbeddd889babd194a4bad071a96eee2f7da2f27c9"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "216eebe7e7bd1baf750824db1c7815efb847b0ff9c51ce636c49240a460f642f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "6d92c4d6d81760065d205d4824d68484854b2278cf4c02aadfc570e49055a10d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "c63965c39e0abbc53cca397dfba076d8878bd85d25fe6cf5ba35326151a98a90"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "8259df749afcdf1d4f0d27bd6729ff9498101d68ef6802655b6087aa9c9186d2"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "c41787b1cd724191c72225587693d11b20598895add6925bf77cb42c11cdbbc8"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "37dca30ce670595fcf0428969519f35b8943e1322ffcd582d09df0fb5412e441"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "657ada743c35b82c499f315b19418b71cf2b305047afe43f216ac734aea9d27a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "854aa9af22848686303d12aa11f0f61c810db4282377a50cd5f343b52babcda6"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "f58411561cdbcbc213ab817c31718cc6c2381614817dffeb92af84f3e6755098"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "1ad1d63857709ef956dc8615cb747017da7529413bbd15f62ae1caf3b02b2484"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "cc32f7a0c773310192d83775ea1187b0649cd2fa579722cbe81272785f0cd587"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "a127689d9a4414903349fb0b22912fa3a90de1446c5420400471e5e519244f35"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "37f86292d901de5328577204902f0d8c75dfb09961dea18747ad6e30c54e1395"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "61e3594782fdae5ff2b482b5bed8301e819e0537aee1fd6a848372a5cc7ca399"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "3d40d1142341892ee893577478c38a308ff5d77541307a0cbfbf95fcce6f3f4c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "ff5512e8447f173f50014bdb495a2debdb72fb1cd3674152c8cb3ff1d671dd9a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "62c13efdedde000470de56c9d37751c6e6aba61140ca3bead902ba1009dd8f69"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 199864, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "14a9290a7c2e8cceb4c95df39ea64d6ff1b6db534fac4e9bd27433b255100bb9"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 199880, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, true, "f2949d4783fb1148f2398204f8e4df0ca1125ffa811ed5514180fa0e012ea599"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "5a48906d4be80ac1960e94d0cf600b6eaee394ec92b1b3056a8b84b6c6373376"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165056, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, true, "9c2e654e51b86ff876deb2abe6148edb7a65b6b4c81cedc7ccf09100fd23ccfa"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "63f3c7801b4bfe45b5d34b12fd1bcad744bbdc2b56a2cbd6f3db9023dda783af"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "a3acbeaf65b8f6ebc5c52b86b3b1636bd5503eea9864174dfbf8da126de4b2b8"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 181536, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, true, "4a8475393712bfbb662784f15d80214a5d04fee9e576e3fcc2531973659cbce7"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, true, "95f86b436d872cb6a1fe630c5e0344f9c9e3073cb853bebd56e4e00a08045a96"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 199864, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "3f1d0b44c62e2561eb60ca0dd9b565f95ca880afdc470297a6371e6524b4cc09"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 199880, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, true, "5e5e4b8d399702bae5fe120eb7c99ae0d15840cfcfbec887a5222aaabcf014fb"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "faba2c06fdcb0fb8ae5c47800f079aaa5e81edaf37213756e8f0eed2df31f90c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165056, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, true, "03b512cdbc8aafea51c2d7dbef46a62efdb57470a692b2bd0dac8ceb94fef240"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "2b50b889a120810eb37572dad54802fea4e7666003a9ed2bc99ca2690dfa4c8e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "ee2c3230fae5082b48e8f1a48826ec9d556b46e1932a2340d625bde4bc2d9250"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128PersistentKeepsAbForGen", 181536, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, true, "596c4dec91b55998773caf411501b851a6b6dc764f4561cd31e30a146d9139ad"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqSkipsSoftmaxQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, true, "ad9e616be1f756f48a66464dec85516169a5a3e423cfba0b0ec2ca155feead8e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196792, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "82638f163ae5737e2494b60f5e76cfdb88fc49833e6712be123a7a014c00790e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 190136, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "5db72664a2cc7152c0e8e999e442ceb8120dc30155ba5ea3379a8b675d71064b"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 195848, 384, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, true, "118e4ed1e1fd01241f3fb53c897d8a7c6cac269484c301ea532f71e2f82f26b3"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 190088, 384, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, true, "d7923c3acc922f1429455c3419dc4adbbbd0df0e6718481b9dc028c2500e28c1"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "af916992cacbd2a188b68db2c9d07fc2dfe9164e0bd38e5f71ddc35e49f410a2"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "2899e04438612b6f814b10b26a74013c018eacba22954f10790c9f882352f2d0"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, true, "693777d41bca8485578fc4c5c38e2186ba3307bd1c7bb67ce894ebbb69fb96fc"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, true, "cd0cb12c43149e2dd9850e8faf47dbbc3354bc3ebd8ae52f0c6efb6295cadbb8"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "f7dca11f132f8777b495c5557b8610845ff3a170265f3a4d89a2d400b11b609d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "f42d280dc7be3edeaba385b4cc2bdb678b20546e1f9b106c1bc344872d24f977"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "a21c90bc9489861b740ef80a63009988d73d9fc8194069fca5d12f3974cf7777"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "50c8db38cd4f938e3a4755287571cd26101f4cbd47b09bc55553e6eb72b18f96"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "98a0f044276ece8b3fc28b452d38882a2e5c742a9b00adc9d67a9143af931fca"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "48de2096dc7d573f899d25682c045612b17067d8e4cd98016dfd2c36ef2439e8"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "bcfd322bb58a6115617df0958b8791fc039e3e5406bb78082d292e075849cdf0"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "2f93f1b18ff09c56c66c27a4cb2a64cad6d2797cf6989cc24ecbe2b75bcee66f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163168, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, true, "16892a8e7b1a54e880942bd1d14062280bfdef265958a64741cbd8f1be5c53bc"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, true, "73375e88fc67ff022d91ddc8b85944e16007bcf3e3725f93941c7b48bb6a6e0d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156384, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, true, "4c879ba6d03b76097593d65adaa5945c6610b3800bb32e9b402337dfe275618a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, true, "3e8c0e3eb4386368d904ae0eb0debb0a2e6d8115f8c3294583fe1e313a9df05a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196792, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "e5690650fa22a592e6eb69cd6d330024a9431a83e1c74e590a23c0abe152f875"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 190136, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "5cdcef082ee209c747927c2e354489c94352095e66f4035bfc0703fe43ce8d6d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 195848, 384, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, true, "a244dd7c96dc6b535b62bdc0807e3d4488e060a66196f8d8fe231149da46d684"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 190088, 384, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, true, "94df29f7010eb1a881f4c5d4c0df5961714665d1ed790bf6bcde8ee257754729"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "0c000612e7198ea264612ed2f9a160631037f92b3b442b70be50c6e1406d906e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "b7b8c788b527285813512c780f10b92046afcc0914409e30c88005c140ba649d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, true, "52ae941250f4708595ff2a55b95582df94deb5b00912112183c48891b15b0e61"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, true, "054cb930d271f0711ac3cba378422417a8277c94092423843df52849976ef127"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "1783aaf7621bfd28b79edd47fdd5f192acc3977371a40ab3012efead5bf578d2"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "33d7b972971070fbcdfc0ca0254c49ca67d6d83f001f56b0db37d1374385d0af"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "ab010a19e81e806a7328c21160de51d05d392536f8e2d7a6289ec086ef9f20ca"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "53d4e9574364fe2babfa37e0ea7848b20efa575b4fc0afb5635e584cf58db5e2"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "c33106088967d4ef370b04fa1a822f172090abc6e81537492062dc6d1dbb3456"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "789c1fe8863f761f81c1b57e7c1c0b47f3cb853872b9cc1096e0f18406c2431b"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "14101fd90df01b02e71f34084d96a1ecf78b3626da80c2ed93dffab08cf6268f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "8aec7a6b8285850483499089847141d57d464c7fb1651377ea7dae2ee094cd3f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163168, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, true, "d291ad2347a6c4e2f9e9ae824ac6278b9bb2a67638aec134de9555ae8a5fddb8"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, true, "c8d72c11a1b5e88e24de75de4003d40ab5f3d3310d5738949144f7acc7c84f97"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156384, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, true, "0fd341bcdc90cef390b2b39c288a6a0218249f4bcdc11227300f7e6343383ba1"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, true, "5314beab2fe8464ec7a1d32c66a527beaabeb299095f20b70346347cb37042a0"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196792, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "1cbd6c06c8c1f87797921ee200f10624bac4abfad8cf44e6917926f371ea41e6"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 190136, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "4f3e80405eb6d11779e8050c6b690979fc9aa486dad2e08f0770a9c31f4e7e53"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 195848, 384, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, true, "dde14dd08f3d48eea3c7ff1fb0941ce53a68abefdb1049257699e131e1677b74"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 190088, 384, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, true, "2165bb96032f7d13a32c059c50587e0fd27812ad10addb200b268458ef6a433d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "eff353bd5f5985df1701be73b2c397c088f339bd12be465a0ec633abf455fe5c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "853b30a5eaab5a73563dd3dbb9976098017940bd987a521d4585c4b2c5bc5165"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, true, "203bb06d6f38a01fc9b6d1dacd9bb9b029a6f4d5ec1e330fd8989cbd40b18582"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, true, "03e0d5cae7ba5aa15f2913dca0fc7485046bd7765fe6c76798a4341b50d01807"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "dc3f595ec43397d0abd3d2fe96ed11accea25d14c4cfd3cfa5f5f62384661b3a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "abcc500746d08c68939cc2b14698070eb7c81f5c3d666967a515a3ebcee27912"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "47181ca78549eaf5af8fe4eaa895cdba825525415611b3c7603bfed6b6eea0b5"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "1839d8df6d3d97854d30bb2112d49e9a7dad81d453a326cde1df2b8ef93faa61"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "b5a727f386b9f26a78078e495f079d4d10c6488bf22e12d707682338b7377c77"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "9bae2b5f9da83a9bfd38de6f49add34a7a2d7bf3b61f991dabbe60203961b2c5"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "00d5e0acc213de35b86f8c8e452035ef62551e9cec709fb7a0e829ed40100ab1"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "1c05478d8ca011c847dbaf778359580b679decffc2fadf3a74a0279f4f5f9c4c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163168, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, true, "65cf6b73f00bdbd7e7c303fa13f331ddaf5c912cd32c38bf4e2709f0eb9e8976"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, true, "a3065a43103272f4f7ca64163a7f3445f56c9265aac0471f78702753d3ff21a6"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156384, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, true, "c0f6f9bb29986bb48a591ffcea53e3fca80c9a64487490c710a243984027dd72"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, true, "d2f8ac738e8bd7ba7df7c344ee7dd868d2a43d9fdce05dabfa4aeacc2afb130c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196792, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "c83eb4c49b7bc1093c1faefc2274d528b9ed4a5edbce4a449e2cdc2a2a78a7a8"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 190136, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "2ceb6d5a2f16b4a7055f7cc58358fe0bf371cc2a0dbee2451d2d3be7d8cdb3d0"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 195848, 384, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, true, "d38bb1c8995525286ace5ea1b5199535e70a41fc84080053612dada9774b0955"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 190088, 384, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, true, "ad3391bf469a81622ad73b7ec60d1e5850152181461dd5d38551ec12d4a95486"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "b83306dfaa58196a154304a3cb7430beaabee006eac5789f75e4a9c8a5d70409"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "8264fc71d301871751f154edef4fe243c58a3c08b95a7187b89514fe47845fc3"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, true, "2bf873c4cbac4e18c92b669d845c42beefaf97044c104a1c9bb39514e34f07db"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, true, "0619ab909772e19319946326853360c38bb87df51ba9a7d3886268f9c7fd4407"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "875e62664958fbed50f6e0db81a675cab971a3b3ff0dceab01b2c37c1153f2df"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "5f1ceca28d4cc8b54b074b48208e9d326e60b7a43f0131b023eb7d7351d9c86a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "36c94f5424b079f98f3d5e363661012545128a4b6aa976fa043929bdc4a4804f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 161968, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "ad54d4ebb65642e4e4c2a63412337e182023a5fd360fa67f0773a4e38df70f8c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "b7d59702a824ab8c8bf126ac8006113a03f351c96a2a0cfff8dba1b84cf6995e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 155312, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "1693090cfea12eccee50aff6be18cc694755aa4a7d84ba8912da3331b657887e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "c4c89c5e0f6503626fb5444e001007624cf3268bed337e6a57d4f3d940ed2e72"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "45a20365e4abd5d967c4c3b5f1989649f2ea3795fa92ebd49170b06285bed5ab"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128PersistentSwapsAbForGen", 163168, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, true, "69d732f8d357bcc81e59a7eaddde777aaec540deafd4a280d6f8f75df733836d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ16Kv128StaticSwapsAbForGen", 161024, 384, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, true, "fbefb2b83181ea3d426e7ea5d972c74077247e22147d58897363dc9eb2bb43fa"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128PersistentSwapsAbForGen", 156384, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, true, "6ff30f98bcdc02b4e3a80adf9418a8f66808fb16a072fb15500b6e9545e0383f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ8Kv128StaticSwapsAbForGen", 155264, 384, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, true, "0078facfc71f39d9b5c080ee57ff97cfa5c4eace80bdaf6d68994d3c71d22a01"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "d700385689e7195c288779f084d6711238f5ae3096e150287fd9364cce41a0bb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "8911e7126b8fd5693588fa86a6614e051c5b24eb1c7687058f3a83623a27e1d5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "d6e1e787e4b336768a8423ae1a006f887ae2d4d1cb55e7381899128004de884b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "ab5c23c0abda55a7ff8f26f73474a1f42746ded481736a7aa2fc956032f95c13"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200808, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "47f1deae19a4d971b87e2989ca85cfebc30329f80fc458b857d9d8f1160c2b93"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "3ab5f7042318dcb29a7c652e3262be5c1e3244fa8398a57979c99b6dbe8e1807"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "d6dcff6df7de56e85d74ecde79da7e183bef6afe3018fcf6709b1a3cdffaf4c2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "fd5a35268c9a8a523e4743ff82143a514e6cb960851d0c63ddda82c719f79f61"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "0207075459f2068a03a3c5fbfeab96967469f5c23e94864b8c7a04ba53ef5180"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "7370890698ee7c13a42edf14d8b2a4cc74031053b81611fc59238c91c67892b0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169152, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "507678da74672ae0e30cc7f52f5bee301427bb90faa1bd4cebcdab10d6a0f1f4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "e1b81b963075b5bdfd44d54001736513156bb05cb03c20e68a1575f6c2b76c4c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "f234810f9df29c6ba9027438fd245050a646bc496c41e98a2f823f8c4352a8ea"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "00a5dd8b9c46897ca6f0fb5b73c738a3a45f736332ba1035a5df02221093c81b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200808, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "86b4f6dee2283c34125014d497f914e0116de68f5f0b9228c54520d999b81de8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "06a2bbab95908e18ba6da1080aa3bbe7e8a1900f74b674537c6582a103f2b92d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "2d166b2ced3d4d5ade65dcf2d0ffd217889745bfabd00fe5e03c4bf375bc3380"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "1e9992728d2c24f912f28ccb8a05b88e307b31faddab46af286fa3d1fff29b05"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "d0009e2f907958254ba00bb06e2b3b79f980471c7365d2dd06f5a693b71916dc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "f0a5eed7048665edaa630fb6a76a4f3124b39212cef9faa4506636829aff9bc6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169152, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "e2afa313c62a83fa78d596cd2cee41f89d266cb7c6ef759ca5208ca151b518d3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "6d85e16a5b20a6b4c69a224d55b65f8d1f8724558df3fa54f27fad6bfbf06913"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "c5a64b331d219bf4ef99ed18ca22c34dc733379ad35d12ef8d4db591d3ead3b6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "6d088c61f6da333b15b0cf77ff7036e355b05ddc32d73296f93bd85fbf4543dd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200808, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "e89f295795a858a8ea4e93149bebf94ed3772dd94565efc7ba3a6a8d6e05d770"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "9a2894a26d9a4c6f5e22ad35c25d9143fc03c42e4cc4d636aa94b7e48d5e65cb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "2632a1d8a1b3d2fa52e689b64a6e7a6c3a564dd25f6973df9b0b5a1fee2cd466"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "2fd109025079a83a990233490a3867c990b5e2121af7def96f3f6ffc314697d4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "4c48c2b5205b27728f5dc2df2c4dfd37182cb8f84af6ded6a1f7d4bbf14b73e5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "3e2fd7999264b9aa2ed3025a5e2916d5e55ac05de61881cffcd0505ae1804f82"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169152, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "8c907caa54dad32c234dd7089f141e4fc21fa924e80e091bf46c942ffb5519a7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "fe9a6bf45ff7a2c2c88c848cb5e0f89c1ced9712d434c5f2d3390fa743a463f3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "e504d7ff279b2b213c4def4d3a818c7baa7d25b7dfe06e27d9894d1128d8ae02"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "78b03f4977c462fa5be55cb11e1874a35e7cca113fcd53ec0a1608f1ba3f5f4b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200808, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "6e94a836a9ba0e605bcdff2b3bd7e4b0dfba02a83c6a188b8e77e84a64f93bbe"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "0a1b17b9f308241a82d19b52be703decf933b61144c4256e834b452df3d02c16"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "92d349c7bb5dc68d2a1caecde80ac74d8a9e309ffa6b9d0634310e4c215cd907"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "898c7cb59bab3a31842c620845861d3721b3ec98c434f5417f24fcb17c2ee4e8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "53ab72071a1a49865e14831b586ac931cbcc788a529062cb8e3eefd93f26979f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "4a92681e67f0a8ef9ab3f48128a257aedea51e792a9b0caf0fade99bcc37a22f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169152, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "2480c1587f3133517461bf17a9c62c2c5b5525638e9ce73759486065156ee7b8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 167008, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "74c1e5d7f33dc36b40d330620360c1978aed217fed8c9b84bbd8a66e95a66dd5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "2b7c424a77fcfb4a5477657813632bccfd23e8d1a5fcc7a7b41297e312219f91"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "d8ef5515130ef8005a4c9524b97d550b13818030b9e44c293a2c9f62e4a9df70"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "4693c346a5306fd65c4a78efcee765379af06a767eba5ef9a6f300c24537d22e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 224480, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "44eac2b0de06aead7916b8ce1829e9eb666b605cf883f687aecd8024cc1494d3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "27174c197da81162d2c838f289e6a7d4ba28b0e21573d0372032302a53e6a97b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 224480, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "58758aa62cb87f7d33e5760c3f4b75c5e10acdc5d35a43f2954d2543e0001d6f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213608, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "d2519c207ef6ee7e8264133126d22cbcea1d74a0fac83648d95f8b967a4e39ac"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "e1defdbcf51ec609607fcd5e7a02d602173d6f0d8d0485d31f933a7e7d1ef71e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "9c1c8375a47c492d5facb55799f7db5ea9bbdf6edd5f892c38dc6f5b35561e6c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "a3ef4e4ea8be00bfd4da15d73ff8d18fb325a272c1226aa89b0d2f8072f6e5ec"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "222136b7b5d6ec5640f3ae3410e240ac1f72df980c88b14165b07bd1ad6a1f69"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 224480, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "e0861adfb133b7e012285451adfc25bd47104db643e4943aff15c5ff7aab14b8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "19b21b4569628767dc000d0487d17c0d9b7ba6a1e324832dc123b309f489fc24"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "533f737ca52082876b17a1b29691aedd8b4cca3606b6b9740d37c920742ac49e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "469c947a6b2e43242df615e23b8d332da7b4cee12c71ac64036aa8223909068a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "2303cb9b7f035c88deaa6b2b9137e0a1c19bd282d58748993d154904b4f00cf4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213608, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "93543bf708c2ec6de31adc08e08341647e30e2a2940da0a1553eaf959a78fc43"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "aa43475feca0e30665981564604e8e2a2330722308cdb8920d96f8b1e4cfdb0b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "22941e3694cddb7b98079e75be25d476ca6d2520af07dd381427d0f345d2d44d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "572b1b0b8c876989a80e23e1ef6a46dccdf49bd06aee60e4e0b706554335e6d6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "aafbaa4057a92155655f46c76b74edd73a15ce205af5f64da267c33f0e755d8b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 224480, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "e3a861dfa0ba2f2f24b5a2fb23f02058a3f96c929d5f49850c801bd9e57fbc38"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "78f8d3599ae5b2b5333a0ff9cfd297be9c989496cc8907e544846d927e6d30ae"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "c3fb51c5c3f79be79caf736039417619c0aaf34a51e3889902b0506cf30128c3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "e1a9e2629047c35bc5086d20f0c91329407f8a37307ef86bc8ba3d1c92cc5178"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "3ae4d507d64c7328bfd0491f4c59768dc1c4eb61b76aec28c9f4fb49b6df863b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213608, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "fdb0d6c6de579bda87ded544a0a368fb5deddd9c18f54cc4b0360b0af5463eec"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "88b38f9f50dad63719b9d9f748bf06ed00f7719445e34253f0e405659e202b69"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "45b1f62b4425415538b0f29950b7c2ec407e30254b0ee2c5f1a286dee630bd37"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "56b2d6a86cbdc0b6b97d1fbdb36d0c213981a8ceaa989f8957a437018f4b82e2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "78738db402a0a06b9c72a6b806efbd94f067605134e02040afb9f841a9fff38a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 224480, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "06b122b9471188950ac550bab4210a441bd4d634e3213e96df368a8859abb3d9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "1c8fc7f4d350d5d4f01a8f5be17ae88aa9166375a03ca9cacacf139bd2c826dc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "c35441e4629e9c8dee8a8760fa5ae10a06a46bdfcd6c02f0716c4d27b3d23d86"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "423bf00e2d54260046c2d21161c1cb8c579a570ec365f0229e480f745e60abd9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "971e4d053c782efd5e94ce7ed1d9fc0ed660223457b71b9c830aeb5e42891923"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213608, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "3917237c1fda6e74f5a7cb51e9119697486e97f0e82c2e38a1c4d442e8304fe6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "59c6162cf7728b086efc3c0028bb2147593659c539d11899bf1d345d8e80db13"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "3dfd3e399561f9d7a38dc36d031f324211151549970ad057de5a0f717475e383"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "f62d67d28812f08452ef527d6a61bab45ad4913180a5d95748f7d126a5f13c61"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "f3cc8864eb309b2549a89fbe90c348cd8323379cd6b4c27ca68e59748c7d7d1f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 224480, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "e2c119a2dc14c33f70594a724d1a219a8d1be96e58f40b9a4698bf6aecbe51fd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "16d9f438107758a4b4d01c86c6752de7467e61362bf33d0951a789be9de0147e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 180320, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "b11e78ea070542c68140f541f8ed3676f33f90d025ddc8c26d5a0b8b5d493dac"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "1cbfdba2b05d685e01ee58c809f405d5f736a527fc4fe73d059ab9830e31b87c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "e88db3af4cf65b7fd25abfbab872a682867f45602385f99d7833f89f565e9ce1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "ba9535c39900128be48c48d9b3572b2d9b4be1a48c0374f77cb2d75b2fe96a3b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "e9901d71eba3956dfd2975c7f09e720eaa773622b5ec1e9d1792fe209756ea05"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "8a56d5b12a41b766cf617aeaf7e996c8ff39dcef4bfbbc81c5cfabbf159cb85f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "5c7c3a8d3847aaff4ed0f3c37162a1da81b276aa6f5d9a88e65808461435c963"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "7f710490e5b6d762f5e1ae0c70a37f89f4286e6023e63d15f65f96e5162270e8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "4fbb3280a289119ef0e97903f71e82d91aa68d0a34656ed5668e86dc0ab4c3ff"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "a786c16c4c4856050dcfb857d85eb907c212b514c40c39e9a355837e7845c821"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "2a4c6476ae7d0d617cb333b7bc41dacbd17f964831ec5ef925378145c60f81bc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "ba9b6b4da11591f1474fb46f369de5d8883228f009d6e4a9a4da41fd9488a4cf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "d0e27424730ee0683c75bcdcac46eed3e5d6749d04f1d1e819ae6187b3829a50"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "8296ef1d60c3be7acafd94418af2bf592c3e80eb6e6857fee32f6223ed38a2d6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "490290be755571e06a7a195fe79ee44c49cdd0dfe047d4b714b0ae971d6b3c96"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "e06a02401bab9746a26aab8e0c208b6c091253507e757debe0e313edc199afbd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "50a03d4469064915095810a483f1623a1f736f88dd929ed886d8384a2609828a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "6c1b30352148b8a1c66855ab30baa45e833e3241fbdc7bb1be85776d3d9d80d3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "2ede616c09407ec387395e271eb8282ecd9ce3bc5f325644d62198d37c55d035"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "702b223f9394b0d831f7b63ba9c7540f7364e7d4108bc42a72b9d600513eadba"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "6458369a9c9a522ab2fc1d79d685ee8d56b89bf26cdb066013230f7787f6afa4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "da3ace0c87e2fe4819fc53b04709aed7f7c4240ecef8836e05b776e90ef46439"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "abd1bb8de8a61ff7eb17b04ae9ac124e3b3b67a31c3a19ba928857acee28d4ee"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "64dbd677c51f870011f02691633ab30ba16ce95d578e22a340502ee2ee30658f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "2fd2aab63c3076cfac65b2af488338cceb780270da1f2819a8955381d9eed3b2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "7b5a05c496f26fbaa247b9db1bc275f3388dc376b2a1212b84231ef7733474fb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "f527e85163791bf8075ea4ebfd14390a6b88ad82dce9699e14dee527f5a73e7c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "ef000bed4143ad8d7703496ebf1c53eb2b2d246b161e0767b057b3d89d0d1328"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "95760ffbf77e5e973d5a3bd4b5815dd470b1e83adc3220b8d1a8e4e2a5ad3d5f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "049aeeac142174afd1f5525dd270573bc04873e053f7c9a828ee31725420e20d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "3ccec8d275ada77fefbf6b933b04a6812e9edf34ec385aa81543adb4b28d1e2f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "fdb7d8c2c3a275b53343861e4c13c74febf654902f9eb8816d38b6d909539013"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "c8cb4639777e577260c68935be938d6e9cb9402e0f2d2ee939484b99653ce62f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "fa055b1f2cc2ff7d4e3b98c919bfdf7050ed60f8cd85be58934c1316a029f605"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "1bbc4f8140a4d9e9b2975f532a07fd3d73c6fb6cd072b54655cd03584ff8c95c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "4567646164fb7fad0a7b4904ff8ab10e74b4def499a0258404d75b51459f8fe1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "43e7ebb594cfa2cb1e5009eec8e040d0502d95fc316fdc48a51522b3b7080150"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "d45b7be040595ae9792f11c29381a3b493b6bdc369c15cc69f3376f60e8f8791"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "f2f2255a0f3bd87ebe1669f51f41de28d464585d5c89d92add913b936c425543"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "b63524a14efa1b3f0aef25265e8bcadd9fec8f644539a49cd2bf539b83e8f564"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "342e011be9aabbf6e9d99f8dd78aa097c03e31cc1eebb49dffe86a66d2120b96"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "87c22d649f7440546cfecb142a8c95c03320b6004d98d793a4d16f022fce46ea"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "ab2c7a4efa62737543441193fa1bb2888cae9e3a4b2169dcd8716b1099430616"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "2e4e3832c8e477cf19d409801a3ba2679830ec9ccdb8c243f7b4600340b0d77a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "2e1405b0f6d1e025593edf3601c9dbc9c4dade10cec5aa69babaf7f8b097e5f9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "eb50d2e386aa0b3447fbef0d77856ac0571c8d622170ccc942e158b872dbfbed"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "540a26ec158ed28b8d6ef3fac1b95ea4df2646b68d3c45384e5e3859bd2cedf6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "e11c84fb7c3bc330e809638393f308c733f4dc5f94c00d31390a500ba9c7483f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "f50a44e493d4a1c5e21955da41354a12fa5caa916c0424caa411244cb8df508b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "48baf6f9f6119da2a9b2dcf029c596952a7cea0499a0b2b88278c4a6f32d26ad"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "98e770c0a8efb9da58ff0a58dd5db4fd639aea13bb42873c9a914bc45211c726"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "a801a6a21516827a7a80f97e347adccbe8252346282ba7ff35800d65be69567a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "50a3ac5813d1f62da4f00bfb8b25102ed47970c119121528228896ead5db3583"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "c7d650e974b6a4373e8fb31514af0e29edefca64816b690d929bd4a09d451d63"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "8d835c524fb1d2220fc3e333675b698ac7f46d18247275d38193e0fdd70bd926"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "77d6e76e6138c0dc18bf7c579c0d6e1da14b307b0afbe6e33fac099f7403411e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "8ef47b75941383efc35588a109a0059f45056d8e533e2915c3e6e7d763f0695d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "83e784b4391863975720060c3b4f356ef1e8229cb445fd022d62a48908990a4d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "a8e724ba4205d63c13c9ef5b2e6589759505c4a39f194c828b5eb9a8333a6223"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "70e574180fcbb7bec8a8188958d56a0390b378ad3758e4338a5cf478c82e76cc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "9e74711337363fda48cc03a844863721b63ad13cc575422a66ab7ff537d21737"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "cc98aae765443cc0b36d9711c4f0ea6f03152bac4eafbafc865b272f35f4af97"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "f60db9f5dd79ede7a1d9f8730e0e583a74c59b6daff2041a1ea918aad0ab6c15"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "40647dcb9155ea3218a6c3ff6e7b296007cc792f45766b642c6ab7c51b45e034"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "51295d803adf2e9735e9dae9d57f47858a5c06ebee5e2d022ce8e53b49182964"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "ccef71e0e82b71a7d28d8f7435de395728f5a9a251197273fa43d9e55c7650ac"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "3a4672608d20d99634eae918f382256ffa953a6be882e1406f4d6178c3e2a7c8"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "7a3336d8182bb5f9fdb468eb50efd3ec7af074762814ecf83c4e6ce650e553d9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "a20961349461c9364add20d9532bd184633ae0076082e4179032b63707d7e48a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "1b66b0cd93b234c340bd2581a6e662d08b52bce4bd04ec3e3740668d756a0ac7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "047db5262f26ce8c4dc5758e007328174272c788e23bbe4eaa86078ad94b3ad0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "daead4ab630adcdffd06e45f1ff0e551e8b088d12ff015238d4948098b3b0892"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "bb1bf4785d5d6b8829a375ed5488279a1df24edd1ae201de2c1f77aadf9af816"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "98c5ba86ef351f1dcb82898bc60430275b2057165f7cee4fe6af677abf420392"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "2f14ef1494e9074aaad26004472c75f968aa1549db464350b4a033a37aeed890"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "9c53fca7c41ff3b4c4b90a77a57679d7f0aa701c8edcbdfe88986eb00ca6f1a4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "30faa2b60bdf481eff4c0d7f32bc7c27b3da53bfcf9904d2af11442b655d1801"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "28d5cad87bf29f91216bd1f52c8ae30ff3b2576374fd3b8de5eb36312484a920"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "675bd9a66b7c30fd045dc352fba656a16adabc51ec0411b960f7114559b46d8a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "6565df444d30aaa0b8e3345389c110b2a0ca0026a2e539585bb833365d318050"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "b4ce4decf54a35b0fed8cb8ea833104deaa51b29a8e9e6cf58a4f808c7f9dd0b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "b97b880b78e6c32392209d63252ad9cf7449d67cbae90eeee3b1523479e1f778"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "19afab864d4371fe90ab3e46d052b9798f4b333797039fbc6bcfd11ff632433b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "81f1a073db2bff66bbee83c6941e1bdc38188b6865bca386ab8fe50f85343be0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "4b1f301dcfe7c39195f742a1fce00caa0dce459d6d2605ced98bf45d194edc30"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "7501eb4d1f5265ea5c0f80735aefa16ed27a61e079aee97b2c09d2cdf30e9e78"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "b219d9cbabcee185232a8bbf6507c5f2c0b0cedf5741616ed9a70547d1a13b7b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "12554300ff8106ad38f4ad34740cada89d32cbd077fa6995df3e15b989970382"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "559179d8fc69710e8aa71c2bce8dd10959275c950104aec074965ebdc18be191"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "6521350df73810c858fd76ae0ea93271c0a2aed1af9939c36c76d68770546dd5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "b1f35fa4f2d32f6750c76c515de8a70dbef3644d4a366ab0e10a346200e0087b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "1766f230e5bc8cd36578c7f3c7c6f1d1c596e33f7d54ba87f7ed693a1419c992"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "f41e512f580575f52a79b20474d9cf7ba2a4c359ae6cad815b1325e577697cd7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "5a585126f91e35ec58e09ca09a7519ca3186a30a2da03721115cea947800e5cf"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "997c37cce86a4b07119c240367c4439de846d6a0ee3e5b5657eb6bce158a3cab"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "3a57cdccb67e6df5c09dbc875b6fd282371e125aa0385466d387925d775ff262"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "81cb90671d4ddcc94acd3ea7aa12a34d8b147a4c8befa5f206bf648f53c4e8ba"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "9a431f5640798673fd79d5b22e498df0a0aba434036c74c130fb5729c64fb22f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "48398e1a8cc5441442f05b6198d4702a3284e9a89782f60124073a10d3d08c3f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "7b0343d6c4f6cdd5877c990964ebefbc0b4a574eb88f9a463c8178242e98013b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "74573b74b6a42dc2de6e60c231dc461d76685d6ae03f24a5e235b0cd5de58338"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "fe5b53018c43af3e8833c940e00cc516c57ea11b216be7c67746b0de4b5a16f1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "6221457f0da13b793a55b2ad14322a701781e65ec072a0892af6daa866a8c891"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "1067f7a6eb7a142758650656aa79e774a94d5152a417158e4054feb513c833f3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "76246812d865f9ef14104764fe1c689c711fd3e4f08b136f41ba8c87a66b0ebd"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "e9a216480225d24c9005f5c016eb44fcbd3b098f7840f89e61927e9bdd02aa2e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "ba5ac4b274a1895c44a5ea703a57bce37aee27dc4857dcba7fb3495f2976b1bd"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "46bff8ba7b0846e489e6c50e13de8a3b36fce7b7437af9dfab3fecefdaa5a140"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "f19efc749cc1e887342fef885b7b8e1f2cf758d1a2d47b2ef87b7c44572ee52c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "60a9cca80f683c6dc65c4962e3ef3d7a04d66ba1f4ec9bcd232bdf1c58cb3faa"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "d139eaff3e5fb615d1e2fbdd72026507bbdddd1fb298f8a2dc0fe5b2dc1522f0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "a39396ef23343d1d93ef137b417d68ab38c01d6363b33ebdddd1e249c06e2df9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "a684c3f6c83721909e1681511f0d3366eacd345e3c77a3c1da2ee526265bda93"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "1f5050736b8b99a13aeb0e20becd6d03fd49f8398094aa6bc5c4296f161b6d82"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "b60ea832c615fceb5e97fa8689242471894f63068ef6493469eea973e31622e5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "5bada656ebd29d6badd8431059a35327789bc18a3bc34b1ed7d25a3980151dc3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "9558df628eadb1e5f726dc51187c4939d1fbd5a7ceaf1edfa852c58180e65ee6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "f71d448d046b7a0a6782975d62152e0c785186f6c18e9a0a55cc341fdffaaa37"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "9c9ed37a473149be981cefc560e035a153194aa087942f1e5815f3431dd1257b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "9e134f2f0750a651c5e2847a1c1a6c3596ddc44c5d709b8f9aa08c375e32f3e5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "6f7b80079e306be3a07209ef6876a7f751919a6d1093a5a52e2a3150171d67b3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "530ab849efb5c1f75d6090faacbfb1eb72f3dabd7e58afab64807cacaa1da90f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "24a4b680b4613c2a70c1565afef8d8534462fad117177ece428f4ca3f7316861"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "de54e2ba4819f31c1a2809beb1b2f5e2c20b7dd80a5a5a50abb53756d83bebc0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "25faba3ee3260722c1b4e0294db44ed7ced1573eb28375702282e44f29b52b34"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "27270446b9f8e492cf18483d43f289fe011cf15aab4c574fea2c31ddc1a14351"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "651324ec152c6038b559054b4b649309bfdd48ba90a8294087f12b8bace3a609"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "51e3bcb3c8c0a1a56142312cfcbb596c67c7ed898b5f98cd72526684be8e0ca0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "25dc55cba01a4febf7090572daefb48f6ee80c11aaf8ef36beb94189c56f0d5e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "06937076232cae90b1ab72267a79c7995d7f941b993d73df1c99cdaabbfd243b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "fd1dfe049af1dd0db71344f2da9a8e77b0e3f5b2912a789ad6a5dbb69059ce8c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "af5b0fd67a58b5bb1d5bb5148301fc73190ac207e0e1e9f0156643b6957b1348"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "95863b9cff7369c0d6fc24b2608d86d6c96efcdc7d22a705502c868354e5d95e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "4a8897b341c74c4db1e9b25ec58cce861cfc1525c0259ee09a58d8c7c0354d91"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "a89b1d64feaf31a19d4f7e642424ef584c95573fb06495bd1557f90f71fb232e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "4615aa3af9dc346f0298cbd657c06659b8829fc91523ac4e908199267cd59967"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "c559b38947246a656c5fd305e06b22daa53ae3e4955b0d87b4caedf28fafade9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196976, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "d5731607b1fdaa070a5aa02c8ce93dea2ed18c63a0a77113ae5cef0f582e7541"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 196880, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "8b75c454484b900be9cdc45acd413563ef2b9417424362249c92b7f80eea37c1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196976, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "51c203a79e234addc60aec27cf37148e9c968760cf39e656a0673102bdf552f8"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 196880, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "4cc47718a3bf86500632b3020731952d30c591362ba429a167a5115813634653"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "af2d5eba44197aa643cfde5fdd492340a3150f27c724ea143c8e53bc9b5b6f6e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197744, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "9e2ee2ad5fc73ba8691ffd061bf05bb1eab4b1a4d3c9312fd21442d31a36f312"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "082191a167c09988033ea31571563abb9fdbaa65178f7a603f093b791fec7570"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197744, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "8a6d4e938f7baa8585f062a679d740c3e93b625e03dee65fe9a420f096b3642f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "eedb34acf14b5ddb361195e1abf005072a1c38b64883f83004fc9ded27c77206"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197744, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "b0f6c6af2f97cd327d20e146814df9a643e3b4a5a9cfe89fd7c34273e9450098"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "4fe6c68a4b4b6c169014dac74040e381f8f5c306bb599d575937221b97314c04"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197744, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "6e2ac6e9d0d1ba61d4284be83efc586f737d66ddb3e8319409381f1ae3582124"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext", 196976, 512, 0, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "d95e880dddc1314e32d9a97223d3925b8436a420f8e98f72706c5a33a62528eb"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext", 196880, 512, 0, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "811bbb10efbf6e56f8c9bf56828e5d604c2870250afa988f571b35e7e24bdbde"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext", 196976, 512, 0, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "7a94a749e8e8ca25e1e33f27a2273cb34240c3a7174f0275246619d0e6a84159"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext", 196880, 512, 0, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "dff920c4aa85e9e4e0488281134871c54554e338b285cc26e0d76c05510aefdb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "54b40626e04583cfb2895bd8e92940626c4ccc91e391da723d9391deed9e13b0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "e833dfdc7f63bd6ff39adfc65fa6cd0bb38b4aa52570409fc941701490a55343"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "255d56f310a19e6472a964e8a54b28caae69ff666b1e9f52cc695fbec7425c79"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "aefee412a18d12f69741e3c473a4d7174dfd5ab47223b9d6de5867d6510b80f5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "e59af1d1cc66de3dc9684eb727b092a90d1727b9e7b81516b7a5f50bf2cbe6fe"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "6d3c73f49f3453cc49e67124e6ccc88580694691acd8d4063f0273b5c4f2b87b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "dfc955ffef4587159f7244e1dc3168c9c017fdedcbbe9b541d81e9f9c3f7a986"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "09ae60975637dd988827052f300b308ffa0f7b7462460fa61e604958995f3e35"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "cc5fc38a4854520f3851b3c80a104e558e9115bd0f6688b89714dfc340eae659"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "1d90cdc291ef75a6c4951229ac35655947c292db6bba5e1f27cd7667ba8e7cf5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "bdd83ca545448b08812455f1ee2c4255b55e66910d11ce17a2f0daaaebe559de"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "95d5014743390b4130958770c4137ea79cb65928fede47af533a448d920fbcdd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "107792bb9c63452a98045d7d517288b1a31e50e9efc183979580d312fab19d0c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "65b04ef14aa3a97b9b91ad52307e709082e3862636630319633dc008b70e901a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "bc2cf3b65e99721434a044f04d0057de90a2e63d81f1f0a708447da1ed4f3a3b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "63b9f3d6efdaad2dd85bad177333550f9be0c642657bbc7bcdf6d6f4c968bf88"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "ed74e1de98ed44b476d1089831753181807cfa9be0bb21a242c8ccf2c4d90824"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "362d9aa28a22ba5833b0eee657c0fb3eb7abbf7040e39c5a881f624dc5a30d79"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "e6ea658b6b4a1e7db73951cf822257ff2e7ebd307f18a5d19077ad8fb881d00e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "e8ee24c32dbd06c7890e97b9dd51f17fe08ab85a41f238a3a2633cbd3725358f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "fd2c5060a709e653f3f16969651fcc8ce3a2badf126d7b7fab4e733f81eb67d5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "ae004592f395dcb70580c7862553eba87910ba556e2ab82b2d9b90ac8bdee9da"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "0845d1aa01dd1684bfddee0f3bff5414a17d9d10d128a5e9f616bb8c012d18d4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "89fb763262d38e3dccae5681d45668735605df0e5cd57333231eec4c7b7b7a6f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "5bafb7c667cf9dc67a41f0ce213b76456e3b75044e6abe3391510840194402dd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "8664891ee400299de4701dd088c176de0657bb672eb503d8605712a33280a65a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "27393b6ce9d902d4ac3164d5750011c9f5b974b8001953e30ac07fcb5e16115b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "1547f3da43a3489ebaae61b7c3268883e2f2f4be0f624e3773f13c289f945da1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "ec4b77114dfeacf5d0448a9eb848226ec0273b5e4f26cec13999148f3642ec98"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "0df4cbf85078ee0bb5191388932f0a2a87ea7ae62b1782bc2e48dd3b62770978"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "2229109325ae825156358a84342d3f2e1f87b27f98962aa2e4d4e32083732741"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "ce4d44cc09c613e8b2d290df9c08112ce4c68ec31c9ffab583807f5d17da3f6d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "714a3a6f63fe51df577039879792724e24f629389a978333d5926a966dca95f9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "172c9e11d20d4ccbed2a1ed8e0af169413cb435a6a7f2d15a9d62165ba254472"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "becf4ea569c1b89ed11830f729ff2fc244d37144bb2676e94dcdc98c4ca2b190"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "1d05158cfd3b24fc2aa53d4d96121003fb979b20207e0f2b8ce8db7549226947"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "d12e82a92509b221c032c01b6f630649a3a19e99439323188f200b77aebbcba9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "41b77beff97bfc42cd64cb0d04e5225382b861c1b114cac9ffe7286d1eba9677"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "ce0f13a193c74572d20f849531f4005b3240123b5ea450f43a0f766047acc7b3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "44b29c40d629026283caf5cf7210dd835c689d1c2d1411f521934fa6761604fb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "14319cbfb483fd0b919be05c54a91197a19d83af24b4e98a76e54a2e3b75265f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "890c1d2df83f9c8c3fc0e632f9935d77ee839e0763f2f7c73de3cc1bdfcd92f7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "17dcb5fa6fceef74e7bdfdfa80e609b500fd137465852cfc05b4ff8352738330"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "a1507332542849c789d4fc79c6c7a6e1f971db5484e2232c7262e8716abba9fc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "191d27f4ec9ba244c99c868e2a9482b939c2d0573210f258469d5e0b275e0f72"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "f8cc48ec5674d824f020f89d3bafbee3ce4e0c668380dee4432992659dc70323"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "320d623402aff6a7c8c206d7943a5648303c7a404febc7a7b845699898a33bea"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "b3ef09dbd061f392e7db7204924ffd01c53a270df2dda000305de0e29ded8fa6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "f40d6d36f068568ffb43b1d570af7eca32eccce70c1be56a1fad94171ce1afd0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "47e4bf11b7e0bfd9bb26a1e7fcf3715f122a5e23d734112973d357d4f6ada27b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "f5f04531fe21cf98b17570238471e5e41753c4f1acbdaff5c934db14604419d0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "6847365661023dd667f211f789b58d99fb0fb524124f3013e84ad21c67c14070"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "05e168b1bbfbe707e809519c6c9413eff9bd10df7bcc2f8cf53396369224f85b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "a95963fe8647c19b54c1f6df7cbc5faec4fbd09b1b97040a44ee018015b960ac"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "b38b6e4ab4ddbfc86d83b458d4fe45781e7b070fb90dabb29c88db0575959413"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "113955b4e1eba186f19ad3dd28047d35734311317911f0fc5c81dfe5faed14de"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "08e8cfd9bdbbe4e9df34dfda2893177bdd5c92db96ec5d889e52a788ed0529bd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "5a9d747d27c5ecc9c3775ded2aa97b35ee6e9caa64c70016b10eda79ac482c3b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "6650ddf60278d3294864abf710882f3350dfd7675742ff1dee9e0d9b6288cb70"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "3ab9353950b1595d030364ab35c54fe44b8b09071a5d90ff4588093ff688cfce"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "f35c52b7a296f5f87c7c6bd50c3c15cf43fe8e2069739156529f61caca2c5cce"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "a03baeca1ddfeaf6b80f9bb7ffaf12047c56cfac4f1f5cf5c5362212f99e00f6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "4cfc4b2e7bec2efaae85999340526fcf51be960139076276f94e835f10b345f4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "f1d5e1e403e9dec565339aa53618c59456d9346aa8dbc6d0df69f7cf88f47db1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "c6d34826bd8358cbac0fa03bee79cde2236a7370a64435ef83053f2fe2abc6e5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "caea5aef779600edd8e284b77492029b53c6c4c68b51b8acbe50e5ebe7b08e5f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "baa4d3e3ff615fc8065f2663be10c8f2caa34cc90a3c78a34318a6678655e231"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "8b5d9955cbd03c55565d01d5af59666dd38d968fe117b2f0958051c3a5ef4ce4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "bfb7f1e7f109d2c7e71ab39cb0f278209c5f1a9dc7b8a4f0e0f1fd5a7bbd2209"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "aa7a0d7b3da705d4d77f3470847f0df4fb52dd1d60b8f8921ec7c2794fd5587a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "a47611cd85db90cb0984847d5fe5f60fdbdce95a26b066b6773afd5236257acf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "ca2f6060dbaea42b28a4a64bf818e4a5163d417cf37f56fd3a1472e1ec578903"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "c61fa379b9a39d11daca0f840e5fd26194d4fcbe53ab9f2b88ffef5796f665ca"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "a26c45ecd5308b3f49d89b408dc105d72a65af771588866d21a64231c01e96b0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "2354b1f4dbc4242698cee76609473b6ff25906249a246730efebacdcfcb798ca"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "0964ebd87f409af63da04a955cda171ab467338cf1cb7d95af2885c488b64d2e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "b07ebb3702c99a3cf64d6c042703ec56733dde5764ab20bb1724ccdc5c3e463c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "ef5a478c5ccfc220649a4aca309de107e5e0f1fbd907770213cfdea8b1e51d14"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "d67b97f5a0d9ea6991fe8806eeae4e2892c5103706d613a8ff0b03a4e676ed8c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "3627bdf19ce399591e803e1e29dd091589c3bf0cd40dab1d707b10558a64a642"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "ad0191d7e4f801f783755c9e52754210d7d8e97b587009b2fd93757e539e4d28"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "9de6f1b3d72db6a50a0b27bb764b801f87932ea0f6919d93944729457d1db818"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "accd3a0d9157138ad1267df97be51e2adaa590b7df71b8ad63d8afe2a26a23a2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "4faa332d6d0395477e21b671da5d3d2b169dea0c11731e9c36e50f29981c0199"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "070ff2c594ee44340649ebf415b17d83cdebad354147a3dd21a8d3293db28f24"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "3152db2b6d9af577b7d17d1544700e8d5fbf081387717f9d3f54dadf0a415523"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "e1e227da11f59f2f0821f98c7d8a7f9e59d9f5ce7cc64fcac201968b72835bac"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "969c908bc2680412ce56fd7c959261dbf6fb64b720c13067e28998ed43ae02d3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "f69cbaa87260670eb3b7b365b977e36e473642b8daa66c450cba8a85bf76aad7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "1f5d0880f7b219f1ea7d4874c37aee48defa512f42f6a53570409db5b57fcd72"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 115024, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "43d8737dc921e20c51522bbe59352db134639c435b6134eaffdcf66ce32996ba"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 114928, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "6078892404dd9d55ca895962eb2a3d95f16b0c01fecda150ba251f3de682bd75"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 115024, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "b96e9ed7090a75facab0d80bc9590961d9aa46db3bc128d3e465b7f76dc6faf6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 114928, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "fc70a193042d314a24832817bc989a26d5986692b1cd7f059d582f013e20db9d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "dbc0d8528d8879a077fd65faf3ea7b984ec708f52c7483a77bdb06b22a725589"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 115792, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "bb774d67aaed6e27a19038dd0b2242923c4e745bf67aa6d33e7ef53a2ee89933"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "6e04e0087463b0e0104d43f9c2f402c7995111ad5b43a77a26c99cc1608cc84f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 115792, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "ac0a6e6287d51907a0d4e84f6b02944a097ba7a035056fcd656cbbe506da369d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "3bd1f1264f8b791c788a7051d4608298f3424fffe7c68e4fb08409110696f51d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 115792, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "09e68794e7bf8092f3294eed60cd7e1e77476b2760199692bb5d783361ef6edc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "c15a7397133a3450a89740e863f575124f7eb357d0dd14792afe8fc40e0fd090"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 115792, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "d51fc9bfe0c50552fdafdfa96a257b77b7c67cc7695826676d59c7ffce46ba41"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext", 115024, 512, 0, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "355493170795184f0c4d92f2989ebb96aebaa5e7b846968c3b51e8c790a1c289"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext", 114928, 512, 0, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "c34705fdbd0ccf050a6da572fc6dd5035b6b4017d22dd40c513602249c286e70"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext", 115024, 512, 0, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "cd62cb82f1bc812caf33351ba804cd5ba257660b925e6f7aea46c06a759d7839"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext", 114928, 512, 0, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "3982d8d05a25f9cfb79b8d96d483f1ef7f89304ceb4f176cf2b1c65610295a50"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "c40e6ebe2e0ed342c2dcb35de9d235b4dd2b3a76db73c3b33ffbfb3c3655d586"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "3fef0f7c587003167b4da1acb1a4612c7fba2c5e2f6724e88b5ad5ea57dc86bc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "6bc8d135a16edf54edd1889c109a604fe5ae36703aa3d36c53cdbb4a04c939c1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "0569004a518dc2ed739a36fcae1cb239ab6abbbf014a566067ca3db818b2aa26"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "370186ef9d7df94c74399e80a07878ef4872444a11933eba45a00d3fa7b7ed1d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "5466003b335b8bc94f48186391029fa7cc75ff01ad255faad2b39ee22db769c4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "c87c2dd085e48ecdbbcc38f8bc719c13634b565718e910c8454f4f4a7f91d141"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "494f3a587fb9892c77a4680bbb245f89f8c66d2df02dfeabb55ef3053e29584a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "6e2e852097109759563155c4940ed7a8a575c4a02a32dd09de699f71cf555504"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "762404969a3fe608c9ab4cd3c67c93c8ed673a0de815381dfe29b9801111c25a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "1e7d8c2a436ea63dc9a425d2cbe4ec0d937e6f35eae9790f222e24fcb3b53c0f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "804ccc6830fe3dd7b5e3f4ee6f31123e0511422913a0ca1902ea2da6b6e7b443"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "97ea03775e658dc515e77d66e4aac0aebd333ac2322c16ee460c58d3b61a8ff0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "520ddee5ab0b12e793b4b26db98f24b3a25ffd85cf313ab7f7a5464eb3ed3eb0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "42334783b0db0b3264217b9de15b168b7da8e0f3a23b12b261a62c3692713836"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "84d645c6e38590cc953d62c49e353a8a0e6f989e3dbf938df0bebe20944210aa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "760339ac9c0b9b249ccd231cbf517c2f4ed5398d5e6130da2d84157efa485e60"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "9e32e87f691193c76a9bd5e7bd66869cd33a4a3dc35ae4681ef86b61accb9e1b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "b6d5d6ad64269605617b449dca60db6a7b14ad01cc225e168e148354c73b302c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "874456613c1c3a05ef7bb1f64a2d136c58fc48fbe461810d9272ab9b1d59f7dc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "4f23ca3defcb86aa674d8ac08218263ffba1308a4a784c4cc3bbafa45c144853"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "170f8720f39a11ae249120df778b5b3729e246d61a6185dd5de2afbc2de5bbe6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "b91c976d7ca17fb4272abcb0bd3b465c140a8700c96a77c2014569fcd792ce48"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "c1a3848b1c31bbf42db46b7e8c57217059dd59b9db1325e285a110ea877f70f1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "be0f106b99091344881af8d44a9b5e7560c44c08fd212f86a18cdbe9a1c68eb0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "a33e7ea7b894fdf4627038e826def799a0731fbd6632e3c0cbf574fc1beef649"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "d8a7681e3c385f24ba4f521fdb3b83db4080735fe0920a0cb3c718158bb9a061"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "6db48f35ddf7c8a064c02bdad81e0c6f1bfba72e8e494c7a7302c79a2d10093b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "b26af1a85dc45e249126a0cc648d81cb961c0b2304143d9f76ff9dc55bbb61b8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "e3cd8931774fbde1d11f7544069b8e277cd01a4d454b0680d12139e7cb5dc595"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "e1b747ae4531dd21112349d6917439a9c0b5ef7d029503233a0ced70c83cbe3e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "221d51a33ba157c41b5921adcacd6bd83137152dff46a3802254e8dc2e8ecdd2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "943962d9c42516c7e6f9fce4d75ee19393437e263ca26544beb764ef200142e2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "8fc9ecf43f217ccfa607951825c6fb6f87f269350108a4f9fbd25d4743f4d44b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "335de5b93bdbf4875d560b54e2670e51dc2a8575573d4eea4e5559e5a3532ba2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "665de989446d5d2325428fee810f817c854cdee02843e219cdff216b1d19294c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "4b74a356c91190c8aa8c8afe5b5db678fb4001c3f5b7062164232508e4bd3642"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "9aaad8ead294c2267c507091c83ac99dc2794aebac567642ec1c774c12077d8c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "fc903d96106eeaee7f1aa9e6fac71222389bdac3b6a7003f848ca013547511fb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "eb39e9dcbb9eb3b625737e2a8aadc1a1755b3b636ee283b7e3a5e9beec3fcd29"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "0f664ba0f63e3a1ca0522164f8b3a38264eb9985b15682c8975b5c89c1d69678"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "2e889b147f20222ccf0bde4e581e4ddb278f4b86f46522cdb7b2d7ae829d9422"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "93f12d2d9e87934e8453d59b5518bbe14c809fc4cd966e9016a80eb8aae653c5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "ec6abe2c66451f36b317727a4cb32300f4fd4faa973e27cc8a7ef2cd1c7cb74e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "3060d1cefd647b47aa99c752c09a1ac57e9e1152bb3f9a3f1a36014206f8273b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "f54bf072d7cc436dae49af2c26dd0d8d12069184599dc098b9740d47a58deaa3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "f4dd38847c058fb759682533a584b80e53023167b62ed4e575a60e17e083fe27"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "777516caabefcaee7c27d6801718d37fdace022a4769221bdc264f6b3d503ab3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "ecc0cda22e7c65d2001aa9b63da0cf7be999fcb17c391cc2b6067b6ca8fbb1f7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "b21d38baaa6165fc8ac8a0b1af3120d2e62b3d5232dd18c7938c8f3457e53f59"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "c870db929d7f5177df53787e8e501d3a00c29c155ac5084d49960309b277a76f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "b94e7c2b92ebc1061a2f3d2d6e25c6aaac0f100fa82f9005a2fb4c82885cfa9d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "2f520b0d2a5f55e4d557bb48a38fcb94916ad965a5c2b596d98c62077c60837b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "e0897c2811db3e3575897e46a6af20104785b16550b19695781b1fa4fb36da67"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "d0cd6303416f796857ff396e92b6e325487786bbb68525434a67550a46c8d86c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "dda75b29b5c40d4b7b9b2c03dc114bed2eda1f58ebbb2bf57fe54fe9ad5aff07"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "8e6b8fe2d28f81f832b7ee8ef3f30c8324fd017ed8ac7ff350e3239c6792aa5b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "e7e9df069d6a8ac2042a96afc6fc6cd13365144bcd81b53f6a595ff6372a19b7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "eeea1a8dad16ceabde1cb3230ddad97a22d549dd71dd9a2c709bbecca2a477d4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "f3af5dffb2af6e4f00e5c02c9c7807d0f3c66a15daca1f00ceb5c4c32b942868"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "638b3b0e356b6d3ea94942caae261ccd3bb92709b5e1fa526b888363d5605048"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "370b8756e56a60a1016e0dcee691ec7528ea367389ff0df396afa3c0080e0a37"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "4546c64df0e5134028a9a2f42669d665653fd90d0d868d8a13d649b7126f9688"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "866cb256c5944afff3f4e4032b8c5c37f7bc249fcf337f05aac5ff623862a0ed"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "9987abc47656161214d032353a03d6066443dcff9ccb4fb0f36587ba4dddc7bc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "ebba941e715ccecd1d9db1cdd2ae4ed92576e6e80bb18240af4d91a5907a4320"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "63babef223c59a8c3ba09795701ca7b7a77e7f231c15a7ba5e2e8d870d3f192c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "3239e54b942bb259bb2b4363248f42b94d79ccf333530e800d3b8f971115dbd5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "b7f4f4ae3c4a92f2588ac88b07c1d1cfa59468c13c0ec4d3d30b66b8f7e0c0ab"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "d56d10962c8331ba4decdda298ce0bb6b9aeb96697dc31558698c3cd105e9542"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "94b045d50e46f0785d70b921939ea07f93b032b0f822f037c7efd36408eff30c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "b5c88cfc2a83c22cff1d8bcecc2d0c3716f00d920a0aba15e623e1f9294c004c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "4f510514d31473a665671bf3580f7575684b265f3cc78de8c871bbe51b51b7fd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "50e04eb4c1045b5040ce1fc93b048078678d0e49beca245a76087952204aef43"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "7d54abfd203875cd818652d46fc88571f58e32e079f78d0f74652bd1c69d8028"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "c0d112ce6bfa1be950f9f9bd7dffeadf072462a750b42a9e4c97d920fac8a579"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "ff0f8e6a75229cd2a2f912c39c869e8b5e450b83f615567f703cb2b011970615"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "eef6c2192bd20c092b350380ae068491bbe9c879d5fb85343fa7b9586c35ed27"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "580303f173ffd6caeb3ef977b5684bc5a1cd63965f7c82b0beac5743d2b90e6c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "6505fe918097ec8719639392f01d59e021a11b467f402a3ecc52dd3c2d47354f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "67dfc3d81b4f6c7e3799b3809c529d8cbb7f192d712400d0385301a234b1d6a6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "07116a24e70964598a385ae9abb9e8558ea835206c5e95545296c1e125b24320"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "80a4e7dc39ae7246cd9ca0fe296491b432e12a1d46679c67f8afecefe15406c4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "8e1b08b6d31a7d88e3ab9832ecfb45b308646afbf70e73de190fbc1eff7d3423"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "1bbc7c02d6a4f17248a5a9cc3ac586a97289b1c4e41e7e1bffc83a400a3b363f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "75dde5065aa5482276f5f0a5fae2238a3dc100bb76c53cba817d9f2848c8f0a6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "ed98438c8f72bc72a8822fd2b5ba84266cc332480ed3a5132c5b8c261fd0fcc4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "e0639ecb6583c6763a9e42db4e5d5bec34717d31830c233671af5551840ef93a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "065f2458aaab11bf1f7ff2f1cc237d2f5e8b2e0a034447e06075cceb291bdfe0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "ca15691b9d60060e1cea98e0e345fd32c440bf6128421d67b29615c644d2314b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "7cbeb976f9f162b39344fb38609391580e2c42a0c30f43d966023fcc75c5ed33"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "e691dea100fb23017f79987d57d972c0492045ccc95b37589d5b68bf7f294980"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "ca6392cbe6fdb2fb1e3b0d3cb3fdefef2c09d7198226cb735c53d9cd39ac8097"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "b62ee6fe62ac102a5b3acf23b9599e05514c54d7a7b8db17ee7026f856a35c97"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "f78d5e6b375f6a460517c17f5c695c7c1110ebb3dacbfeb3ef623ed59fc43c7c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "ea588a64a990c32c444b41aae01afdbdb8295510381899b310e86d2a7092f290"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "3a0db41834a6abe3f6da0289b496cc092cf8b700dc61e2cbe0de7054e58f81e6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "666fde13dce393329b5dffc1b814f300fec16de9a126dcce6f9da577e6f94b98"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "712e56f7d776378727b526227b9e357d9189d1187c227278e923d21b5fa7ae08"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "c08ae1f6ac9269be2705c614f359e6891745a3aa42cdc9cabf0c08d3a75cd50e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "f143d448406cbf1291fef4a26415a99fb3625ab2a48b88fa870c2b6afd310b5a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "e0b4deeffdf4e84f33eab336f9f94d7ad6138947ead125abf070497dd35fd4be"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "6460d8605c2fb49334bf050641a5b97e26f7815827a5f891011e05d797177f28"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "04053dfc0e118fb7b6f531c3a96b8650af56e26215f0539df1c211f9a996333e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "281c17aef09a8c97d5d418727a34606dc8767a9af36afbb8012d51d655310d77"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "1e055ba112796267fc84213a27779742a422796bf5d5c5c8c42f1880e87f5a07"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "0eec6a8b12a64bb540b423056a8f3e1dd329066f69949e0d5611f6850525b46e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "6b5eeaee8924531dd6d6cca2d9d0712e589fb9d62c15753a51c77008ffa87ac6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "4d88d7e3d09d5ce29969039df7ae72a03c07d7581872744af1a753f33ca41126"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "6179806a97dfcc827d1b307e582bbbdb0ee589b1dc67ac156fd5ca30bdbb5163"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "247466699ec9d2c251712b3c997a591720fb75221c311f59212ba59135b3ea8c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "68621f3732e4559a464f2d895b22d896f242499c46c7c7d0a214ce67b241e207"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "ba12ffeb6cac67d2b83c6e3192645932af557a1576a419389d3e873f320895dc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "e3d724f3cf60c3c716e894dda46730395c8f1f108ec001e1c0783c540b5556a6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "21229ac3cfa330f339f7528c5804bde3147e8a720aa5caa6e7222eee5d64dd7e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "4bdbefc54c277ff9b9e78f4a8db31d2adb4af79499ef29707da12b6cf0e652cd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "d86ea17912d000885934270a796f2f6b1b01fcb0259b040149e7d0e3b340e02a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "0e555b7ffbde5d34da4e811ec42656ea785c23f166578b4bb0a10aa657ea8005"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "c32e5e9db7f23eea56da299fdd03f2a50b9c44bda9346dd752d879bc5ffb3194"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "dd9230897758d049775c1c14b41eaa211062e6471033c563a85e784a27525af7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "54338f3fe545838935c8ff837fb1bb05fefc914c9ee5ef03a0c914265492f956"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "a84b5f4e5a7a094c350ee9c24906772d944c7d431fcb1ef40d10137bc2719c9a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "be42785a389da847c2e16c988ff9febc9cbcd72369d10d253200c1bb4d2821f2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "14204bf16517de992e172c1529269eae6bfc2db9ff0d5d2644bb922404969708"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "ce7dbea299154d8eeb37c76d46967bee79008a28c6a06e922ccfd0a42e61fbde"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "f7cdc612f3c45d49bf3693b0285e17b2a8565ea725518c78ce3da25cac52e1f2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "9868ba622373e3272d60c52f3652603505a4325287191b81b86a01cc8350abe6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "b6d681006988cf44c11750bda9f6f5195d2f55044e3ffa3b65e6153e31cedd7a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "c46f75762c51044931384e2bf8ea63e5216c62734040252fb5f9443b764d66da"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "ccb483cf246e6c20a19d6235e86daa4a7fccfddad41beba42f3caa080d111f2e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "fa9a270be8163bbbe9ea05effa7fac5c3a64d9fcd076fbd864ea7c37204a1500"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "ff3af0052bddb3338716d7b57e0306134651c45920fff20e0ce2237a4c146947"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "11130186cc081e4e50984dee41587aab753346f402e66d141d5ea32a358c3029"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "86874188473e0f1300539953a844b47a9d3eb77f1ff32d7c674ae3065ddcf163"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "f6d413c0f71886fa04ab7573d30b3270b20c4d035a25fc8c7a5751bfde43869f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "cf59be854b38babb8c4e4b751df2c296a02036e25b168847abbb46736b1e6622"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "2bf7e297c1ac91058ce43b8e05a7c2327a15db07c1ac14d9f1ec580df6b58483"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "9d402df2e46ab62173f57401d29daa82c99719347183915a8c4019e507d914fd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "54fec03dc5f2c8c6cd679d1b89d38ad20e65fc3d90ee293b47ddfe65089d6264"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "1eebc8b62f6fc48a1f7831e1b348dbf421437aaa8cd3f9f5008a3cb168bfca87"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "744d66dc6ba57fcd23b785c353b2929aac6e45527f5b25b551c148a2a1ba77db"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "fce7678c060dde0eb9373773858db791e61e693ed250622fd432fc8a6c67013b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "557a1d5a17ff09e3a9d1bfc3e77fa8f80e36350dd7d75c1ff58198a10c6fea83"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "c94197a45305d918bfb16b7af6c1a90f8d28a12ce5119632a0570c436323ca1e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "290aa34e81234f0e3ca5508f7b5f22661389a7abcffb7be1ae98cbe984fdc2d0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "725d8ac5e8c33381dba6f9058b76fe6aadc5f556c7ff320b2b95ecdfd7d160c0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "a3311fe8149750edd7d0caf12e1a8da473a4affe885ffbb025b89115fae589e9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "6f7e01f5ea942705fd41f5f8da5dfeed9ba7b33d9cee2f8c0155326c46fb306e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "ce36b0f3e1725e416e3f88bfad9232bd11cce62b06d519ff29d07281a679e170"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "8e7fd9b6faace44567c2e6f977092a9fa98ea144d39120d75bcc4212bcf6522e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "523457986530afa1b0468a8809a4c6a7907527eb7585438dbf5d4997bf2832f0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "ae53af481946298e5a3a31eda499b34fc2e800a609026d12d2158d08cb464ef2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "44a515b6af2adea239ab2c509d99838079d0899bfa4888a18a233342509d8c50"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "f06fd74c91adfbb67b1837e382f3baffc7370805e0aedb2355443a13ddd5feef"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "2ed3ab55bce5324fc8a65ae2f3be56c5ba72597e6a2d4f593eb98d4727e66f6d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "4f3aebf6901c8b66db81b2acd8acbbcc7f61c336a84894777f6660f41c33e7f7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "96ee23ababf8edb2b02827ec0823154d7f71c1e33f8162ea4b9fd46f3732809f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "c7dbc5768eede59aadc2d4cdf5b82c613e2f1f8adbea5faad1737f174edff6bc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "6f1bf039da44cd344476de53785184cfce591d82b9f9a42632cd1ce484ddda6c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "b991415d6f052ec8208a513fff3245087e77c248de0d81bd6a86ecf0d44880db"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "8f6bfa78ee955b9a6fc709930c731d26edf00c27019dce67764d4ae2a48c7276"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "9d3f8e2afcfca20aed284299bd7944ac6eb33b52a8425b473636d105da85d3b0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "b5d1806c203fff0d96a2c4ad324f237d5ea7abaa2c0b872eefcedc22a758fea4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "e52589687047e24e6463f5b15926e48ae2669d5d5b77d171e7a93e4ca8332bda"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "dc022d03aa45b12e2a089313f58cab58d5f85cb0981cb190b12b2bb30e0c886c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "15e4da3da6dd11d9cca98d14266c508ef142fe8faa5bedc228a36207c8994e26"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "8231fb55e904d8d555b02c19f08f23d4934dd2f03bd80d72d31e17981b9a3f67"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "d9c0c05d6227bdf0343d38afae674ad3ef9967d43fef1d14a45ffad7614b7e87"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "4c6881d4ac253d82a0027728746a987a0baf00fbfc96133c8727d62e8569e488"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "e3724c65a4937c212eb6bd399fe3123f1790dffc3dd605a8f0e1f7fe80273cff"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "11a5d94b9b81d18dfe5fe143e545e3c9435a810b22900710be03e25e5752a5ea"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "e11e0e46697fe10b539af15a9e5d6e274ac6f7eba36c6be4b7e918dd12ec0b66"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "0145c0265822b88406afdd6b50a2366e698d1a8a2e90d589ad70aa495510acd1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "2b03ee2a12e239a176436dd239f572220ed260e15fab12abafb53a2b1a310789"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "667fdbf60108cc5e04e5ba28b0ec3c3eb4b529001bdab89bf621d9178ef55c72"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "6ddf6fda81bcf06e4f166ad764840319ba283dd58547f39499b0c4d360cc72fd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "1efa6814e3deaf6bf762a50dc51cdfc3b4b0482a9c5b4c608d0c2ffaf3f63153"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "a0ff1d567e0cb25fdf143417cef5a744994c0dc49a91174258c296661d9da739"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "93980d553762431b0c874170e24685b4af1a0abc9a56709e96a77d57a2d46e04"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "9f9f7293907559249bbcde67758a4c2fe4f25db94fb2e44e9e9b2225a3a08103"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "cc60ab04d62802184d817823717ba50102f3819a881832b90f2d5ae2fe47223b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "a9b171626ddb2c24f43227b4e35a080e60dcd2ee0705a51594c147d1284b63a3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "7da0b4f376825d66b890405e7287018c4c1956d55a8b657842574e4d26a0b2d0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "3efea94d62fe74b6b577ebe6921440cc06dd2213de41f76ac2639b8bdcbf2cdb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "dcbd73555e59695e90f5abb7d0226cced2b719262c1cafbaaec2561f03066ba6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "ba311a0bffdce51b85536f17455860af9e7d1a901833b83b663e8b174293c0a8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "7c8d590edf46d58dd3596f87c1a8e4a16c91a2848b8d68972712d69e4078e3a7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "a47b5662dc8bd8cfc1dca8ca9922f89e17cf01c50d23b469cf8ef06e4bec7df7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "d34e8c5c51740961216ed05905895e6b44bc1e4910eea3fcd213c1c0105a3187"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "161662ddc8870b260a048bf5b54d9c2233693d4a6df6ace2ef37397a5d93e634"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "d752fd8f6319e0f9875c5dc6abfc94e2387499432160d671c1a6672e7969f98b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "18fe38d782da9ea2dc54f0344613c9a1ae35cda0665361e4ddb642dadd86bbb5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "d301c3949b488d848fe48c6cab8f3609dfba721a2e27da3c2ccd3e9204178a08"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "ebf4113b6757365a063faf347ab41dcfb8969b280d2aa956d19e26c839920748"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "3be2fc216cd7152394ea5aed6b031b8e4b93a5fb7bcbc2f6090d260be3a44973"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "20859bae4bfc260b10755991c266b984ff70b73a83c3c5fe143f7f0e0b407b15"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "0a7bd40a05130cda5c40e7b88ad916e2f35f63889c5106644f4d6abf1906e2ac"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "59413b2ee0b5b3cf295c525d2d1e31cf2d0a4b50c782bf1396f377e787dd4805"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "e07c74cc856bf7e1751024f7f73dee093ff5ab044c1f5f5df664943bd6e46007"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "86212872cecd6e445d42839e1b2ab6a85192ad6fec9d4a4fb4bdb303da8c9493"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "cbbfe2fbd174c9a33884366925beb067389b7c56e456af93469c7828ed27f57d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "d2715d118b2d34275d69af1c2f4e60f3487ce1c70425332c796f1e8af55dfd82"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "16f4a30e4282f320f5a4d59089bf457ef4793dcad1a6c37b06795de6d9082f1e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "5cc0b2b7ae065b57c1e771dd3215eb7a6804847d3a109458703e74ddee3ba616"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "8348e5baed106aadbbeaadb6aee027796116ebde114eb6cbfb2ec4a152329626"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "81dfd633f24dd45bca8c13abdbf4e899a8d15e72b78a6fdb06fe315ad1b528bc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "c3f08c82388a37e88c2bdbfad016ddf9b2cff9d97692ab6cd9886fa4436a9639"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "5c128daab2ec5704e2624ff952637306c121c2c7ae9389609bb4454ed213b211"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "e1014ece4b89d75ad2f9439ba02d9fb130e88e554fa1bdbc36b09e1baaeaeb66"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "8a31fd150f72ccc39c18b08b52a0ebdcac63cdb629dd4d627030711014622fd8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "a3d2b7cf1ba23fc5c7efdea26bda56d6cc7415e2bf532fe87e570a4d6f999adf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "1bd27552e8da84e9ce83b95cdd6dca24e75cd0bfb42ce72083b3576be8212515"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "0622db568b1a7c77194f00a12b1ba73e807c4036d92abb8af9c4ac2527f3145c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "f4222be8e2a4416066096c5e005fac6b20d21c7d9c74a9ed0bcec2d6b6e58305"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "5acb56350aeba707a9bf646c26a70740f16928591f8543e0997321fca72c10b2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "7968dea2f5897bea55e094190270605ad0bb02c5bb1282a01bf154e570236951"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "2d7fc99b6d0599570fc136cd77b78f959fb4de023cf94d8f28cb19d43bfb205e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "e9fe20c2bcc112c0c4e379545bb68fb6031c9f2e39e7bef10d4ac3ab740f9913"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "35d148327cd34c0b41380c16da96f3366411d496865bb1f7e0fbd19b9a37d25c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "f69bf2f11267533c286d06720afe924658f6530e7b86099741c4f898c4cd04e7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "c9778ea4df457e7141f139efae4e5a7668aea9fb5991fa6d65cc3e85a2120aca"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "06ea078c6002a267af0481c8040d5802fc2325a668691af13b861c7590cd07d8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "d1c7be67b402764a4c3562810ef81d16fc83b6c3fecc7acce38e9ca612676240"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "a43ede20cddbc884f97f9cd0fd118a51b145fce935c0a61b5e01026af33c7e41"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "dbd11f62de9951f4ecc73e05f10a874ea1515faf2dcbbeb4261c7ea76b6ae841"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "b14295544fe95f4657579b50cd435d4bcea7f8550695fff528cf7a93a698be06"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "5ba52fe43f5e5f28de8fa942cac11cb643e073f571ac55a1978a93f4496878e7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "679b03741b28f7e311f995f7cf8a94d4739019c83218723c805b03585e34d366"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "f378896f5e3f4e64df282223e96f530d41038d2e4a47ee0bc4c8108b790415d4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "431c0c85132778aa6c6908506b68358d82e4655b707cd59f1fad52a27356940e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "0253c5198d534b031fd4f7fc5d3e8f600fd952bd8e067b72b27af8421aa3be8e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "60886ef7c9617e84e07f8e2fff87d81e9aa8e576b13a176658e768ea766933c5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "115889cf86841268c6cf27d197793962f9a5b66cb41c458b9551a2caa216fdb2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "49769ed782fd387dc35a9047f9794f75836249a2a49bb64786cb2373e1a163e9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "b03554b62db6cbd27da9684c317979abb9dea8a3f4e88b6252a1c3d3696f8d28"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "b0e3fc888036d56ff67118621383c5a6834acaa258c2941acce36d2f38d5c05c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "31656721f73c20f751821c3437254c9a46126ebfdb9245338147d8d9fdae0e5c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "5c726e6f12b798ece478d49b818024e5a87f6aa2e0565173dcd076c816928784"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "d246710ca06245bce6945ec0dde4ba5f30a09c5cc198d9ffd243a31038291030"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "a38cc162c2a100ec6b3a7aa858b81a515e7c81a4e88861974f4d74400e2b16e4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "ca978224f02e064877cd11557b022f7b846483e5c4219ff613023a07a7023869"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "818b693e3ca8133f80cde0a7d86777e3b8315c8cf9ce1b783c1adae4d8416d2f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "f1f41f8a13593a5c0cf6275fc7f578955aff85d0e2e344cc25e5ac138c93d7d7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "3cb0e54f61cb1be14de63a46362f45bfcc4dbaf0ebe0e856f6b8d3d313131c7d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 41312, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "bc78a01d54c596c733a13033ed8ec4d8f50028e6cb3255dab9f9e8930d93ce6e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 41216, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "609d7d3f09f3e91a0001403414b7c2b159f1cc1cb32b4aaee755e26fdc44de6c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "6875c4eb9dafb35498b9643419a213923d20d270786049d49332b80f56973158"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "6432cd1dbd3e351ff5828f6c07731f6dac319840dd1785badbab7dad2f39b935"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "282cce8ca855a7958a83500cbb4ec2c992bf098e96a0902e1e4fc186347fe227"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "a2e618a7261d2ecbfeb1310eb1a1fad84585dba2d49accfac061d3757c82f3b3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "0f483583df88a7039f0acf8100797ab7e941097e457065db945daf259902a003"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "3b60d0c3eb676f9627958c43b6c212a90dcb6448d7b876c2695a68327d8f5a9b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "a398cd0de4522cf4c6185ff1b8aea44f4279295f0159c0dcd494bc162b7c6128"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "3bbbb83738dfd98eab3098b1625cf20181fc18d210da9744ed156dc24b354239"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "f9696297f87d6edd580416497d2b88d788093120c4c1c83add5a68d5f49c3b63"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "bca953084513c6951a471e2f1a63fce86ee7797fa413565d24c70d3d68f4981a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "457420b57b33554a6219751c86f3556979b2c236a1acfa99aecf1426cba0a3f7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "fd7009b4d59dd09dc0cff578086baed4c7ac1a9ef8df7e8bb1d8de76eafb21f3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "7cbb0b355e7ffc22478950342bff10676ddea181dda440425ff2cdc64210a30a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "c2e15eb992677872b8956c765fabeadb0d0e480e83df5b6dd23fc27866fabd90"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "17be5ca4c01b85ed5e36d62ed9168f2fef482961f4f0c9ca001e4edad09e1625"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "c2c79f26c692fbcbe88e3f51855b25d77fefb6d4861464aa19c536e089c5d926"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "99061ac096dc2eb43875e809258c54be6194d521a7dcc0be1efc8fbd786dd1d7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "e9cc5811246b40712a60ece651e28c32fe692aef75e0a3dc7f1b07c2dbcbc7b9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "f4883e002fbc61bb2d090ae4a17e40a5aa8a253632e634e4410b62ebc4ea24a7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "62eb2e5e9f8ca6d8970218b63f9478a23c301a12c19f717f13f5075abaf96ed2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "a38417f23cd58f5f298752355127e4ea45d1cb4747e7abf2764d9c4e57e68e61"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "be691e7a0a788cc276b2666d3c73d1aa492f981bcfd3746fd95b2634f241da5a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 42176, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "db2bf52ce7dc9510ae973c26cd280fdb17d6d085b90dd0db8b20acf02ccca081"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 42080, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "0b91252c8f527a51a19d28d47280f26725e0780d9acf42f6b48d29d37f39a881"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "50e7aeb88ee98b0bdf85f87cc911fbcb9ca1f61268d1e162addcb99f51314874"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "0667fae31ccd3b9816bf9f459f4e0f37b77c66a069043c00b2cc480403a603e4"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "700d453ffc673eee02aa84c745bc1edfa15441ffab51e852d8b0e72c963eed30"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "4697ffc66e8fe3839e71b3b7ba0ef1226bbe4694b049991a4ed2145d4fdc11cb"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "5074099581e1f825c4e1efceb15b55f1859fb383bd4db4d1f1ba92b6c4ca036c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "d09bc9408db33deb176441bd2493faa35f61e4a213a0801678597c910e5db44e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "8ffd3e9396155bb679c90df58363c3e41029870d0c23491b0fa24280363f35a5"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "9e66dd325343509f89e20c565ae6f11a2ff18332af33557b21ec759b803f7877"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "f6960265051c3e993fd20328ea5d91ff6656ca9915ff15b0b21192f81d62ef30"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "9e8bb1dc7ef742406b6fb451157c320dba37889a2a26c959e46b0d8350cef4ca"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 164224, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "e29269bada80d84d5c3fcbc41eaec5f5927c00d98a33a4beb44782b01802add2"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 164128, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "cdb224427fa0d1be2e544c951fc98132bbcdaf4133d643d5ab098ff2f45d5fde"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "1ad8c68d2dddd053a3634466af8586884ca0779128cc8dc14377c3d71befc332"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "9faa2e692bc71a837023fff91a5239ca91bc2818430095dd5aa4602284000185"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "b9d26925ffa5d698b1444afd7eeb973e2972bcf232b9d84adef9a5f544167b11"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "2b955426b7785f6c0a9d24bc04cce9d70f5098c07eec6a37befbc0244e3819f4"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "ea7cc2468bb50e2c9f724e8eba5116157c71db8af6ecb4dfdada05b4dd9ecf79"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "02510ecc71d0b024410f53fbaf2d4eb67ec7fb67fc2152635287e35d1bb6f962"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "46e5362f4a791ae1f0469015cacf2874b3c781b7db93551a7391b32a4aee8ac9"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "39109f4dd152979da590e13da526f77ce7fc2c2c03cafee3c584d8d9d4444d71"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "1622f46ee22564f7c91cff0224952a2e58586862b3ac3e8b8026f7556a6b0d72"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "8717a15968c19c0b418303eed339461cb5ec50c63a8a7cd2b335df266be61b8b"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "ecc1b3a768b337eb99e1865125663115e21705f4af38a1edabc7937c5d0eef9a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "18cecf0b4cd120a43bfd392c3298325554bf397f3f824683bbbd33d499800beb"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "a3888696ccce4f3b189d14e0fe4a1f87f5b609ac6811294d419f330334c3abc2"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "c74e55f49f23f2716312698e324fb7aa9ad4b5b2488b21a1743222d39d9e4ab3"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "91212f4b404773a97a5311ed48f3eda2c91379eef22c853887612a01b8afc91e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "d055d9da010b004398c97cc137653a4771f77bfbb4a3dbb84f277a6c9dc7b1d8"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "8de35015c0a3b4338938ca904ff6c08fb4f5e72a99a4061181d68ad0b0a4cfc7"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "96ecbe15d05fbf14e44ecd87a2c6853cf4ba0e2766dc2240f0d64e1e81bd7970"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "9f3af8590f37388e268428d824f13534cfe6c454116368ceb6815f04e0eceeec"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "7e703454367a2b7f270d74a74e0ff45180cd63fdd95960c309f20eea82dd2f7f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "09951432d8919bf195717ca14ca00a726b17f7c3003d6689af418fd9e1413d5b"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "94cc0503374aa498f6cbb6b791a9de2e75bb6d4720b9721ede60817b4c514b63"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 165088, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "eafb4822337b16da80a37e551d5425b04c2d1b4cb8ca901cf2269475aad3dce8"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 164992, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "4e3e7f31ba274a89d6378c53e903d277861258fb16a4764f6fa9f43995fdf2d4"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "bee2424fc22a3dd1e7010f92bc0a07fd7b8d1281ec2d6dbd7649b3545975045d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "670978197188ab5466908f2c554bfa9ee67c4a9df6d35576b789ca04fa9ed247"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "dbb8ceeeab72425ae1fb963c1c543606c2423609013ce97fbd535324eca2f2c3"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "2b09f039f981a373a69f0733c981a6c2991faeaa175f19a9e36cca87931860a0"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "04c6ad53808e74f986b4fbc6f1dea5437c71a6b602a6b4ce941934f15323123d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "9b9f5c1765aa5eb3c730784d2b366604e155a4abf587236106230bcc7ba846aa"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "c3d173aa5b15b05336d38a862a51c6a613b47fdf6f6c1cd6f778f4e419a925d4"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "8419bf5c35381be9f7806e42b452867d643899248bb5ca5d98e708f0ad151b22"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "5c4c7c16bdb2dc6645cb056fb959d55858c257c1a1971abbc248e1e173fc372b"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "62285fc02bcf4095482fea3341df544e9ebd5c567b6d0f2124577015f45c3ad1"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "061862835ecbdcb3f588e07d0988b455c0643173674a32344254fc76b3d6e0c9"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "f7e8d96ce19c7308830f566e08758ef7353080c8c10d2c83cb87272b056adc96"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "74eb4e319162e68db792f7515391b390a19c7659683aed4549994434bd91e8c0"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "9d753f62c52cae5b040f2eb1ada36a223bd26c5ff49b8d3a21be4bdcc9fea374"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "aff81512ffddbf030a32fd7b5a67583b046917463b53cea3da0b2031781f9bf9"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "7b9cd8f7c35408289aae9526e5eaf5c4acedf43fcbf0e440d823894184c86f45"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "a63934fa5733e4aa6f0d3be4d796e9edfaac6e0f9966f8602ea5976e20f82b17"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "d5389df999b7fd199bcee1e75616525b1e6ff5d15ee64ed4be42d67faf8a8147"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "d0d9aa88788e2285d46b0e8a26d87c6b013821ec446b8077e6fcb7fabd12ccb1"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "58c9614a637ed72e07c93359cb4faa3352f36a50a21b98e2694068fc68eb0cf9"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, true, "3e0eb5e618d978ed315ccfab0f50c94eaeb614d8911582660bda6bdbd079f5a9"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, true, "99835ab0c3f5cedf78f762810219b9e511923d4826aa7ab17973727d11c7efe5"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "049a3f15a9890cbf0375afc60b3de63b9ce8efcc0255beae3fa1d8feecb3a6e9"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "077f5ad5de0346de1df92d5d9329d6254f2484bf73a309cf463b7c526c97992b"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, true, "fa88c90f09e5a86ecce3baa02da1c274cc28e27595f4ab83c8f33fe3c93f6ae8"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, true, "465b04736c5ee325d44ab7ab1f9aea15ace209554986657713a6b3b7a2686686"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "3b32195e62339b09e116f214a9302a5a74ca34465457cdb536d211f571fd69c1"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "b5e343e5b39a24edfb0012028f3b0d02420b02392e434ff07a0b800edaba0b4e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128PersistentContext", 82272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, true, "3514d43e339c1242a7fd077874b7b7e1155d5f4b89b6e5ba4c8ed9196119e4af"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqSkipsSoftmaxQ128Kv128StaticContext", 82176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, true, "f26a96a8a0ee48ecf4baeab8046e90fd3bd998b6daf94ed781dad47069a043da"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "c0e04d3cb97dd96fccc08072be172ab863e9c72c306cb878fb0749f782d97350"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "f7d98b455a53226ddbc00bd4b3b4f591fa0b3f46e48fb0fdf70901e9fec00dc8"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, true, "bc083a50261010398b714b04b50e14197544dee92e7cfba799ebf22b909c8ffd"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, true, "6080821464a052a73836e78a9de21e2de4768d4107fc7b851ee29cf35c0c4872"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "980d470da19118113a4e39e0f61363c0ee7ad81e24b758295b69bd7b8343e2ff"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "c7dceda7e2c3ae12636ff43632b32ef5d0bcb8e4b922aae9427f6c9cb25171e3"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, true, "4fec636ca76e8c2cfab941394bcff884a80534a35d4bb951fa961d6d36ba12e7"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, true, "f31bef86bc97b23e7d7ae989cc7a6aa4dc5802b5d561266ec502bfbac48b5dbc"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "c139170df8436631fff4156e8c1a446f4c92ca059a3aaf04fea4798d1b7def5d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "7f1fcb989d3fb27bbd818210774afffb8d3d521bcb8c2e5bc2f5a7ecc0ea0cb4"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, true, "86ba9c4ae95241e4f60951698905032b7218bd58e9c47885a7097bdc5ebf9d17"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, true, "2ba426572d6c98f36c18216aee097b5362d7553bd37cedace34e58446524ab2f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "23eeb96d23e0bab8667420b82cbb20f267efad6b7888eb9fbb9d8a4abf276dad"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "a55866ece172918a27cccac4ab608469d99e4396bb1e3137010389c37238d5ca"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, true, "afc600bd9a185d778edc4f5b9d740b103c5c08afda5690b2cffdbd7e5bd50739"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, true, "82e648a2cbea18fa089d54e9afcfe0b33d8b6f898fe477fb72b42a2130af4cb4"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "01ffdf5251509277cb709fb94e0366f3488b56d731ca4ca214f131624886d10f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "7c725ea9242b5f114edda0a19518f569947cd89a607142bdc62aafe5a61bb728"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, true, "6d99aef289e63deb44ff5825ac86270f1277caa09560b41c17e76b394c3c975d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, true, "941d065ced27d9ba7f5b27a183557d4563883718e227e7b6e0bc3b1e5b01216c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "469657a2b90d5c35c2d00b71c7b9f5d5d935bd71696391a1ad875ee9e5d99429"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "09cae8e1b45c004d14177b56f0e48b0eb93d55c8b7a98741e2e0b9fa9746efe3"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128PersistentContext", 83136, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, true, "e75834f7c8652ddd24250cfec1d740d299b0cf1b67c86b05bff7549bec49bd2f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqSkipsSoftmaxQ128Kv128StaticContext", 83040, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, true, "9b8df44a5dd25b282dde3d6f926bc4e13709f632ff8018c77f78540b9d9e617d"}, #endif // EXCLUDE_SM_100 }; // clang-format on diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/fmhaKernels.h b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/fmhaKernels.h index 6208767157..224ff51295 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/fmhaKernels.h +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/fmhaKernels.h @@ -42,6 +42,18 @@ namespace kernels //////////////////////////////////////////////////////////////////////////////////////////////////// +// Check if two SM values are family/specific versions of the same architecture +// Returns true only if one is a family version and the other is a compatible specific version +constexpr bool isFamilySpecificSMPair(int sm1, int sm2) +{ + if ((sm1 == kSM_100f && (sm2 == kSM_100 || sm2 == kSM_103)) + || (sm2 == kSM_100f && (sm1 == kSM_100 || sm1 == kSM_103))) + { + return true; + } + return false; +} + constexpr bool isSMCompatible(int gpuSM, int kernelSM) { if (gpuSM == kSM_103) @@ -107,9 +119,24 @@ public: CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, kernelMeta.mSharedMemBytes)); } // Make sure the hashIds are not duplicated. - TLLM_CHECK_WITH_INFO(mFunctions.find(hashID(kernelMeta)) == mFunctions.end(), - "The kernel's hashId has conflicts with others."); - mFunctions.insert(std::make_pair(hashID(kernelMeta), funcInfo)); + // Except for the case where we have both family version and specific version of the same config. + auto const hash = hashID(kernelMeta); + auto it = mFunctions.find(hash); + if (it != mFunctions.end()) + { + auto const& existingKernelMeta = mKernelMeta[it->second.mMetaInfoIndex]; + TLLM_CHECK_WITH_INFO(isFamilySpecificSMPair(existingKernelMeta.mSM, kernelMeta.mSM), + "The kernel's hashId has conflicts with others."); + // Prefer specific SM version over family version + if (existingKernelMeta.mSM == kSM_100f) + { + it->second = funcInfo; + } + } + else + { + mFunctions[hash] = funcInfo; + } } } } diff --git a/tests/integration/defs/accuracy/references/gsm8k.yaml b/tests/integration/defs/accuracy/references/gsm8k.yaml index e24f54f7dc..f79a0eb0a6 100644 --- a/tests/integration/defs/accuracy/references/gsm8k.yaml +++ b/tests/integration/defs/accuracy/references/gsm8k.yaml @@ -42,17 +42,17 @@ meta-llama/Llama-4-Scout-17B-16E-Instruct: deepseek-ai/DeepSeek-V3-Lite: - accuracy: 64.74 - quant_algo: NVFP4 - accuracy: 62.14 # WAR: nvbugs/5503479 + accuracy: 63.71 - quant_algo: NVFP4 kv_cache_quant_algo: FP8 - accuracy: 62.14 # WAR: nvbugs/5503479 + accuracy: 63.71 - quant_algo: NVFP4 spec_dec_algo: MTP - accuracy: 62.14 # WAR: nvbugs/5503479 + accuracy: 63.71 - quant_algo: NVFP4 kv_cache_quant_algo: FP8 spec_dec_algo: MTP - accuracy: 62.14 # WAR: nvbugs/5503479 + accuracy: 63.71 - quant_algo: FP8_BLOCK_SCALES accuracy: 64.74 - quant_algo: FP8_BLOCK_SCALES diff --git a/tests/unittest/llmapi/test_llm_multi_gpu_pytorch.py b/tests/unittest/llmapi/test_llm_multi_gpu_pytorch.py index 20dceb1216..1ea15a2e78 100644 --- a/tests/unittest/llmapi/test_llm_multi_gpu_pytorch.py +++ b/tests/unittest/llmapi/test_llm_multi_gpu_pytorch.py @@ -66,6 +66,7 @@ def test_llama_7b_multi_lora_tp2(): cuda_graph_config=None) +@skip_ray @pytest.mark.gpu2 def test_phi3_lora_fused_modules_output_on_tp2_identical_to_tp1() -> None: check_phi3_lora_fused_modules_output_tp2_identical_to_tp1(