mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-04 18:21:52 +08:00
[None][chore] Revert NVIDIA/TensorRT-LLM#10847 (#10869)
This commit is contained in:
parent
c381790d15
commit
ccf4d79c6c
@ -334,11 +334,11 @@ class Attention(nn.Module):
|
||||
key="sparse_attention_config")
|
||||
|
||||
if config.sparse_attention_config.algorithm == "rocket":
|
||||
logger.info_once("disable rope_fusion for RocketKV.")
|
||||
logger.warning("disable rope_fusion for RocketKV.")
|
||||
self.rope_fusion = False
|
||||
|
||||
if self.rope_fusion and not attn_cls.support_fused_rope():
|
||||
logger.info_once(
|
||||
logger.warning(
|
||||
"rope_fusion is true but the attention backend does not support it. Will disable rope_fusion."
|
||||
)
|
||||
self.rope_fusion = False
|
||||
|
||||
Loading…
Reference in New Issue
Block a user