[BugFix] [GDN] Read linear_key_head_dim from hf_text_config for multimodal models (#43978)

Signed-off-by: IdoAtadTD <ido.atad@twodelta.com>
2026-06-06 00:16:14 +00:00 · 2026-06-02 16:17:55 +02:00
parent ea0d045a05
commit c91a87f01a
1 changed files with 2 additions and 2 deletions
@@ -177,7 +177,7 @@ def _resolve_gdn_prefill_backend(
        return backend, "triton"

    head_k_dim = getattr(
-        vllm_config.model_config.hf_config, "linear_key_head_dim", None
+        vllm_config.model_config.hf_text_config, "linear_key_head_dim", None
    )

    supports_flashinfer = False
@@ -218,7 +218,7 @@ def _log_gdn_backend_decision(
 ) -> None:
    """Log the GDN prefill backend choice in the attention-selector style."""
    head_k_dim = getattr(
-        vllm_config.model_config.hf_config, "linear_key_head_dim", None
+        vllm_config.model_config.hf_text_config, "linear_key_head_dim", None
    )
    chosen = {
        "flashinfer": "FlashInfer",