mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-04 02:02:01 +08:00
[None][feat] Support new Transformers RoPE configuration format (#10636)
Signed-off-by: lkm2835 <lkm2835@gmail.com>
This commit is contained in:
parent
e9817461ba
commit
25148d3fee
@ -24,6 +24,14 @@ from ..metadata import KVCacheParams
|
||||
from ..pyexecutor.resource_manager import KVCacheManager
|
||||
from ..utils import get_model_extra_attrs
|
||||
|
||||
try:
|
||||
# Transformers v5
|
||||
from transformers.configuration_utils import ALLOWED_ATTENTION_LAYER_TYPES
|
||||
except ImportError:
|
||||
# Transformers v4
|
||||
from transformers.configuration_utils import \
|
||||
ALLOWED_LAYER_TYPES as ALLOWED_ATTENTION_LAYER_TYPES
|
||||
|
||||
|
||||
@dataclass
|
||||
class AttentionRuntimeFeatures:
|
||||
@ -448,6 +456,13 @@ class RopeParams:
|
||||
def from_config(config) -> "RopeParams":
|
||||
rope_params = RopeParams()
|
||||
|
||||
hf_rope_parameters = getattr(config, 'rope_parameters', None)
|
||||
if hf_rope_parameters is not None:
|
||||
assert not set(hf_rope_parameters.keys()).issubset(
|
||||
ALLOWED_ATTENTION_LAYER_TYPES), (
|
||||
"Per-layer-type RoPE configuration is not supported yet.")
|
||||
config.update(hf_rope_parameters)
|
||||
|
||||
# get rotary parameters.
|
||||
hidden_size = config.hidden_size
|
||||
num_attention_heads = config.num_attention_heads
|
||||
|
||||
Loading…
Reference in New Issue
Block a user