mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
* add fmha repo Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> * fix format Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> * fix code style Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> * fix header Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> * fix header kernel_traits.h Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> * add .gitignore file Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> * add SLIDING_WINDOW_ATTENTION Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> * fix style Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> * fix format Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> * update setup.py Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> * update build_wheel.py Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> --------- Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com> Signed-off-by: qsang-nv <200703406+qsang-nv@users.noreply.github.com>
106 lines
4.7 KiB
Bash
Executable File
106 lines
4.7 KiB
Bash
Executable File
# FP16: different b, fixed and var.seqlen
|
|
|
|
bin/fmha.exe -v 0 -runs 1 -s 384 -d 64 -min-s 384 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 384 -d 64 -min-s 384 -b 128
|
|
bin/fmha.exe -v 0 -runs 1 -s 384 -d 64 -min-s 1 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 384 -d 64 -min-s 1 -b 128
|
|
|
|
bin/fmha.exe -v 0 -runs 1 -s 384 -d 32 -min-s 384 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 384 -d 32 -min-s 384 -b 128
|
|
bin/fmha.exe -v 0 -runs 1 -s 384 -d 32 -min-s 1 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 384 -d 32 -min-s 1 -b 128
|
|
|
|
bin/fmha.exe -v 0 -runs 1 -s 256 -d 64 -min-s 256 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 256 -d 64 -min-s 256 -b 128
|
|
bin/fmha.exe -v 0 -runs 1 -s 256 -d 64 -min-s 1 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 256 -d 64 -min-s 1 -b 128
|
|
|
|
bin/fmha.exe -v 0 -runs 1 -s 256 -d 32 -min-s 256 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 256 -d 32 -min-s 256 -b 128
|
|
bin/fmha.exe -v 0 -runs 1 -s 256 -d 32 -min-s 1 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 256 -d 32 -min-s 1 -b 128
|
|
|
|
bin/fmha.exe -v 0 -runs 1 -s 128 -d 64 -min-s 128 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 128 -d 64 -min-s 128 -b 128
|
|
bin/fmha.exe -v 0 -runs 1 -s 128 -d 64 -min-s 1 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 128 -d 64 -min-s 1 -b 128
|
|
|
|
bin/fmha.exe -v 0 -runs 1 -s 128 -d 32 -min-s 128 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 128 -d 32 -min-s 128 -b 128
|
|
bin/fmha.exe -v 0 -runs 1 -s 128 -d 32 -min-s 1 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 128 -d 32 -min-s 1 -b 128
|
|
|
|
bin/fmha.exe -v 0 -runs 1 -s 96 -d 64 -min-s 96 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 96 -d 64 -min-s 96 -b 128
|
|
bin/fmha.exe -v 0 -runs 1 -s 96 -d 64 -min-s 1 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 96 -d 64 -min-s 1 -b 128
|
|
|
|
bin/fmha.exe -v 0 -runs 1 -s 64 -d 64 -min-s 64 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 64 -d 64 -min-s 64 -b 128
|
|
bin/fmha.exe -v 0 -runs 1 -s 64 -d 64 -min-s 1 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 64 -d 64 -min-s 1 -b 128
|
|
|
|
bin/fmha.exe -v 0 -runs 1 -s 32 -d 64 -min-s 32 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 32 -d 64 -min-s 32 -b 128
|
|
bin/fmha.exe -v 0 -runs 1 -s 32 -d 64 -min-s 1 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 32 -d 64 -min-s 1 -b 128
|
|
|
|
# FP16: different b, fixed and var.seqlen (longer sequence length for flash attention)
|
|
# NOTE: HALF_ACCUMULATION_FOR_FLASH_ATTENTION has larger epsilon.
|
|
|
|
bin/fmha.exe -v 0 -runs 1 -s 512 -d 64 -min-s 512 -b 1 -epsilon 0.02
|
|
bin/fmha.exe -v 0 -runs 1 -s 512 -d 64 -min-s 512 -b 128 -epsilon 0.02
|
|
bin/fmha.exe -v 0 -runs 1 -s 512 -d 64 -min-s 1 -b 1 -epsilon 0.02
|
|
bin/fmha.exe -v 0 -runs 1 -s 512 -d 64 -min-s 1 -b 128 -epsilon 0.02
|
|
|
|
bin/fmha.exe -v 0 -runs 1 -s 512 -d 32 -min-s 512 -b 1 -epsilon 0.02
|
|
bin/fmha.exe -v 0 -runs 1 -s 512 -d 32 -min-s 512 -b 128 -epsilon 0.02
|
|
bin/fmha.exe -v 0 -runs 1 -s 512 -d 32 -min-s 1 -b 1 -epsilon 0.02
|
|
bin/fmha.exe -v 0 -runs 1 -s 512 -d 32 -min-s 1 -b 128 -epsilon 0.02
|
|
|
|
bin/fmha.exe -v 0 -runs 1 -s 1024 -d 64 -min-s 1024 -b 1 -epsilon 0.02
|
|
bin/fmha.exe -v 0 -runs 1 -s 1024 -d 64 -min-s 1024 -b 128 -epsilon 0.02
|
|
bin/fmha.exe -v 0 -runs 1 -s 1024 -d 64 -min-s 1 -b 1 -epsilon 0.02
|
|
bin/fmha.exe -v 0 -runs 1 -s 1024 -d 64 -min-s 1 -b 128 -epsilon 0.02
|
|
|
|
bin/fmha.exe -v 0 -runs 1 -s 1024 -d 32 -min-s 1024 -b 1 -epsilon 0.02
|
|
bin/fmha.exe -v 0 -runs 1 -s 1024 -d 32 -min-s 1024 -b 128 -epsilon 0.02
|
|
bin/fmha.exe -v 0 -runs 1 -s 1024 -d 32 -min-s 1 -b 1 -epsilon 0.02
|
|
bin/fmha.exe -v 0 -runs 1 -s 1024 -d 32 -min-s 1 -b 128 -epsilon 0.02
|
|
|
|
bin/fmha.exe -v 0 -runs 1 -s 2048 -d 64 -min-s 2048 -b 1 -epsilon 0.02
|
|
bin/fmha.exe -v 0 -runs 1 -s 2048 -d 64 -min-s 2048 -b 32 -epsilon 0.02
|
|
bin/fmha.exe -v 0 -runs 1 -s 2048 -d 64 -min-s 1 -b 1 -epsilon 0.02
|
|
bin/fmha.exe -v 0 -runs 1 -s 2048 -d 64 -min-s 1 -b 32 -epsilon 0.02
|
|
|
|
bin/fmha.exe -v 0 -runs 1 -s 2048 -d 32 -min-s 2048 -b 1 -epsilon 0.02
|
|
bin/fmha.exe -v 0 -runs 1 -s 2048 -d 32 -min-s 2048 -b 32 -epsilon 0.02
|
|
bin/fmha.exe -v 0 -runs 1 -s 2048 -d 32 -min-s 1 -b 1 -epsilon 0.02
|
|
bin/fmha.exe -v 0 -runs 1 -s 2048 -d 32 -min-s 1 -b 32 -epsilon 0.02
|
|
|
|
# FP16: different b, fixed and var.seqlen
|
|
|
|
bin/fmha.exe -v 0 -runs 1 -s 512 -d 64 -v1 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 512 -d 64 -v1 -b 128
|
|
|
|
bin/fmha.exe -v 0 -runs 1 -s 512 -d 32 -v1 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 512 -d 32 -v1 -b 128
|
|
|
|
bin/fmha.exe -v 0 -runs 1 -s 384 -d 64 -v1 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 384 -d 64 -v1 -b 128
|
|
|
|
bin/fmha.exe -v 0 -runs 1 -s 384 -d 32 -v1 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 384 -d 32 -v1 -b 128
|
|
|
|
bin/fmha.exe -v 0 -runs 1 -s 256 -d 64 -v1 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 256 -d 64 -v1 -b 128
|
|
|
|
bin/fmha.exe -v 0 -runs 1 -s 256 -d 32 -v1 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 256 -d 32 -v1 -b 128
|
|
|
|
bin/fmha.exe -v 0 -runs 1 -s 128 -d 64 -v1 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 128 -d 64 -v1 -b 128
|
|
|
|
bin/fmha.exe -v 0 -runs 1 -s 128 -d 32 -v1 -b 1
|
|
bin/fmha.exe -v 0 -runs 1 -s 128 -d 32 -v1 -b 128
|