[BugFix] [GDN] Read linear_key_head_dim from hf_text_config for multimodal models (#43978)

Signed-off-by: IdoAtadTD <ido.atad@twodelta.com>
This commit is contained in:
IdoAtadTD
2026-06-02 16:17:55 +02:00
committed by GitHub
parent ea0d045a05
commit c91a87f01a
@@ -177,7 +177,7 @@ def _resolve_gdn_prefill_backend(
return backend, "triton"
head_k_dim = getattr(
vllm_config.model_config.hf_config, "linear_key_head_dim", None
vllm_config.model_config.hf_text_config, "linear_key_head_dim", None
)
supports_flashinfer = False
@@ -218,7 +218,7 @@ def _log_gdn_backend_decision(
) -> None:
"""Log the GDN prefill backend choice in the attention-selector style."""
head_k_dim = getattr(
vllm_config.model_config.hf_config, "linear_key_head_dim", None
vllm_config.model_config.hf_text_config, "linear_key_head_dim", None
)
chosen = {
"flashinfer": "FlashInfer",