mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[BugFix] [GDN] Read linear_key_head_dim from hf_text_config for multimodal models (#43978)
Signed-off-by: IdoAtadTD <ido.atad@twodelta.com>
This commit is contained in:
@@ -177,7 +177,7 @@ def _resolve_gdn_prefill_backend(
|
||||
return backend, "triton"
|
||||
|
||||
head_k_dim = getattr(
|
||||
vllm_config.model_config.hf_config, "linear_key_head_dim", None
|
||||
vllm_config.model_config.hf_text_config, "linear_key_head_dim", None
|
||||
)
|
||||
|
||||
supports_flashinfer = False
|
||||
@@ -218,7 +218,7 @@ def _log_gdn_backend_decision(
|
||||
) -> None:
|
||||
"""Log the GDN prefill backend choice in the attention-selector style."""
|
||||
head_k_dim = getattr(
|
||||
vllm_config.model_config.hf_config, "linear_key_head_dim", None
|
||||
vllm_config.model_config.hf_text_config, "linear_key_head_dim", None
|
||||
)
|
||||
chosen = {
|
||||
"flashinfer": "FlashInfer",
|
||||
|
||||
Reference in New Issue
Block a user