mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[XPU]fallback to TRITON_ATTN for vit attn on xpu when use float32 dtype (#43759)
Signed-off-by: Yan Ma <yan.ma@intel.com> Co-authored-by: Kunshang Ji <kunshang.ji@intel.com>
This commit is contained in:
@@ -110,6 +110,13 @@ class XPUPlatform(Platform):
|
||||
dtype: torch.dtype,
|
||||
backend: "AttentionBackendEnum | None" = None,
|
||||
) -> "AttentionBackendEnum":
|
||||
if dtype == torch.float32:
|
||||
logger.warning_once(
|
||||
"Flash Attention on XPU does not support float32 dtype. "
|
||||
"Falling back to Triton Attention backend for vit attention."
|
||||
)
|
||||
return AttentionBackendEnum.TRITON_ATTN
|
||||
|
||||
if backend is not None:
|
||||
assert backend in cls.get_supported_vit_attn_backends(), (
|
||||
f"Backend {backend} is not supported for vit attention. "
|
||||
|
||||
Reference in New Issue
Block a user