TensorRT-LLMs/cpp/kernels/fmha_v2/src/fmha/alibi_params.h
qsang-nv 0fd59d64ab
infra: open source fmha v2 kernels (#4185)
* add fmha repo

Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com>

* fix format

Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com>

* fix code style

Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com>

* fix header

Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com>

* fix header kernel_traits.h

Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com>

* add .gitignore file

Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com>

* add SLIDING_WINDOW_ATTENTION

Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com>

* fix style

Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com>

* fix format

Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com>

* update setup.py

Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com>

* update build_wheel.py

Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com>

---------

Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com>
Signed-off-by: qsang-nv <200703406+qsang-nv@users.noreply.github.com>
2025-05-15 10:56:34 +08:00

57 lines
1.7 KiB
C++

/*
* SPDX-FileCopyrightText: Copyright (c) 2011-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: NVIDIA TensorRT Source Code License Agreement
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
namespace fmha
{
struct AlibiParams
{
constexpr static int round_down_to_power_two(int x)
{
x = x | (x >> 1);
x = x | (x >> 2);
x = x | (x >> 4);
x = x | (x >> 8);
x = x | (x >> 16);
return x - (x >> 1);
}
AlibiParams() = default;
AlibiParams(int h, float scale_after_alibi = 1.f)
: scale_after_alibi(scale_after_alibi)
{
h_pow_2 = round_down_to_power_two(h);
alibi_neg4_div_h = -4.0f / h_pow_2;
}
AlibiParams(int h, int s, int tp_size, int rank, float scale_after_alibi = 1.f)
: AlibiParams(h * tp_size, scale_after_alibi)
{
head_idx_offset = h * rank;
sequence_pos_offset = s * rank;
}
int h_pow_2{};
float alibi_neg4_div_h{};
float scale_after_alibi{};
// Could be simplified to `int rank` derive the others as `num_heads * rank, s * rank` at
// runtime, but this makes assumptions about the layout downstream
// (e.g. downstream may only split across the head dimension, so s would be the full sequence)
int head_idx_offset = 0;
int sequence_pos_offset = 0;
};
} // namespace fmha