diff --git a/vllm/model_executor/layers/mamba/gdn/qwen_gdn_linear_attn.py b/vllm/model_executor/layers/mamba/gdn/qwen_gdn_linear_attn.py index 92fc6442ced..7a0d50c74e3 100644 --- a/vllm/model_executor/layers/mamba/gdn/qwen_gdn_linear_attn.py +++ b/vllm/model_executor/layers/mamba/gdn/qwen_gdn_linear_attn.py @@ -177,7 +177,7 @@ def _resolve_gdn_prefill_backend( return backend, "triton" head_k_dim = getattr( - vllm_config.model_config.hf_config, "linear_key_head_dim", None + vllm_config.model_config.hf_text_config, "linear_key_head_dim", None ) supports_flashinfer = False @@ -218,7 +218,7 @@ def _log_gdn_backend_decision( ) -> None: """Log the GDN prefill backend choice in the attention-selector style.""" head_k_dim = getattr( - vllm_config.model_config.hf_config, "linear_key_head_dim", None + vllm_config.model_config.hf_text_config, "linear_key_head_dim", None ) chosen = { "flashinfer": "FlashInfer",