From 3fcfab08838b6e0a6df745e74b99ee73a308d6ca Mon Sep 17 00:00:00 2001 From: Faraz Khoubsirat <58580514+farazkh80@users.noreply.github.com> Date: Wed, 10 Sep 2025 02:19:06 +0000 Subject: [PATCH] Qwen cherrypick2 Signed-off-by: Faraz Khoubsirat <58580514+farazkh80@users.noreply.github.com> --- tensorrt_llm/_torch/models/modeling_qwen2vl.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tensorrt_llm/_torch/models/modeling_qwen2vl.py b/tensorrt_llm/_torch/models/modeling_qwen2vl.py index a5b478f817..92d1f7e224 100644 --- a/tensorrt_llm/_torch/models/modeling_qwen2vl.py +++ b/tensorrt_llm/_torch/models/modeling_qwen2vl.py @@ -4,8 +4,9 @@ from typing import Any, Dict, List, Optional, Tuple, Union import torch import torch.nn as nn -from transformers import (AutoProcessor, AutoTokenizer, PretrainedConfig, - PreTrainedModel, Qwen2_5_VLForConditionalGeneration, +from transformers import (AutoConfig, AutoProcessor, AutoTokenizer, + PretrainedConfig, PreTrainedModel, + Qwen2_5_VLForConditionalGeneration, Qwen2VLForConditionalGeneration) from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import \ Qwen2_5_VisionTransformerPretrainedModel @@ -362,9 +363,10 @@ class Qwen2VisionModelBase(nn.Module): # Currently, copying vision encoder on all devices. # NOTE: Using attn_implementation='flash_attention_2' to avoid the issue of vision model's GPU OOM. hf_model_config = AutoConfig.from_pretrained(model_path) - vision_model = model_class(config=hf_model_config.vision_config, - torch_dtype=pretrained_config.torch_dtype, - attn_implementation='flash_attention_2') + vision_model = model_class._from_config( + hf_model_config.vision_config, + torch_dtype=pretrained_config.torch_dtype, + attn_implementation='flash_attention_2') # TODO: Make vision model compatible with meta init mode and load_weights at the same place self.visual = vision_model.to(self.device) self.post_config()