mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
feat : add PositionEmbeddingType=0 to xqa support (#4934)
Signed-off-by: Jiying Dong <87510204+dongjiyingdjy@users.noreply.github.com>
This commit is contained in:
parent
bfa877a22e
commit
51652b9b2b
@ -68,15 +68,14 @@ CubinObj CompileEngine::compile() const
|
||||
case PositionEmbeddingType::kROPE_GPTJ: ropeStyle = tllmXqaJitRopeStyle::TLLM_XQA_JIT_ROPE_GPTJ; break;
|
||||
case PositionEmbeddingType::kROPE_GPT_NEOX:
|
||||
case PositionEmbeddingType::kLONG_ROPE: ropeStyle = tllmXqaJitRopeStyle::TLLM_XQA_JIT_ROPE_NEOX; break;
|
||||
// For kROPE_M, set ropeStyle to TLLM_XQA_JIT_ROPE_NONE to let XQA kernel not apply RoPE.
|
||||
// At runtime, a separate kernel (see invokeQKVPreprocessing) will be launched to apply RoPE.
|
||||
case PositionEmbeddingType::kROPE_M: ropeStyle = tllmXqaJitRopeStyle::TLLM_XQA_JIT_ROPE_NONE; break;
|
||||
default: TLLM_THROW("TllmXqaJit: Bad RoPE type");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Make it explicit that Ampere-style kernel doesn't apply RoPE in the kernel.
|
||||
// For kROPE_M, set ropeStyle to TLLM_XQA_JIT_ROPE_NONE to let XQA kernel not apply RoPE.
|
||||
// At runtime, a separate kernel (see invokeQKVPreprocessing) will be launched to apply RoPE.
|
||||
ropeStyle = tllmXqaJitRopeStyle::TLLM_XQA_JIT_ROPE_NONE;
|
||||
}
|
||||
if (applyRoPEInXqaKernel)
|
||||
|
||||
@ -62,8 +62,9 @@ bool supportConfigCommon(XQAParams const& xqaParams, bool forConfigurePlugin)
|
||||
// TODO: remove this when the kernel bug for num_kv_heads <= 128 gets fixed.
|
||||
return false;
|
||||
}
|
||||
if (!contains({PositionEmbeddingType::kROPE_GPTJ, PositionEmbeddingType::kROPE_GPT_NEOX,
|
||||
PositionEmbeddingType::kROPE_M, PositionEmbeddingType::kLONG_ROPE},
|
||||
if (!contains(
|
||||
{PositionEmbeddingType::kROPE_GPTJ, PositionEmbeddingType::kROPE_GPT_NEOX, PositionEmbeddingType::kROPE_M,
|
||||
PositionEmbeddingType::kLONG_ROPE, PositionEmbeddingType::kLEARNED_ABSOLUTE},
|
||||
xqaParams.position_embedding_type))
|
||||
{
|
||||
return false;
|
||||
|
||||
Loading…
Reference in New Issue
Block a user