diff --git a/tensorrt_llm/_torch/attention_backend/interface.py b/tensorrt_llm/_torch/attention_backend/interface.py index 677a441c40..71f7c8be67 100644 --- a/tensorrt_llm/_torch/attention_backend/interface.py +++ b/tensorrt_llm/_torch/attention_backend/interface.py @@ -24,6 +24,14 @@ from ..metadata import KVCacheParams from ..pyexecutor.resource_manager import KVCacheManager from ..utils import get_model_extra_attrs +try: + # Transformers v5 + from transformers.configuration_utils import ALLOWED_ATTENTION_LAYER_TYPES +except ImportError: + # Transformers v4 + from transformers.configuration_utils import \ + ALLOWED_LAYER_TYPES as ALLOWED_ATTENTION_LAYER_TYPES + @dataclass class AttentionRuntimeFeatures: @@ -448,6 +456,13 @@ class RopeParams: def from_config(config) -> "RopeParams": rope_params = RopeParams() + hf_rope_parameters = getattr(config, 'rope_parameters', None) + if hf_rope_parameters is not None: + assert not set(hf_rope_parameters.keys()).issubset( + ALLOWED_ATTENTION_LAYER_TYPES), ( + "Per-layer-type RoPE configuration is not supported yet.") + config.update(hf_rope_parameters) + # get rotary parameters. hidden_size = config.hidden_size num_attention_heads = config.num_attention_heads