[nvbug/5387226] chore: add propogation for trust_remote_code to AutoConfig (#6001)

Signed-off-by: Superjomn <328693+Superjomn@users.noreply.github.com>
This commit is contained in:
Yan Chunwei 2025-07-16 16:05:38 +08:00 committed by GitHub
parent 763012a88a
commit 7568deb2f1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 20 additions and 31 deletions

View File

@ -1286,7 +1286,8 @@ class BaseLlmArgs(BaseModel):
'pytorch', '_autodeploy'
]:
# Load parallel_config from the engine.
model_format = get_model_format(self.model)
model_format = get_model_format(
self.model, trust_remote_code=self.trust_remote_code)
if model_format is _ModelFormatKind.TLLM_ENGINE:
if self.build_config is not None:
@ -2083,7 +2084,8 @@ def update_llm_args_with_extra_options(llm_args: Dict,
return llm_args
def get_model_format(model_dir: str) -> _ModelFormatKind:
def get_model_format(model_dir: str,
trust_remote_code: bool = False) -> _ModelFormatKind:
''' Get the format of the model. '''
if not (Path(model_dir) / 'config.json').exists():
raise ValueError(
@ -2102,7 +2104,8 @@ def get_model_format(model_dir: str) -> _ModelFormatKind:
PretrainedConfig.from_checkpoint(model_dir)
else:
model_format = _ModelFormatKind.HF
AutoConfig.from_hugging_face(model_dir)
AutoConfig.from_hugging_face(model_dir,
trust_remote_code=trust_remote_code)
except Exception as e:
raise ValueError(
f"Inferred model format {model_format}, but failed to load config.json: {e}"

View File

@ -5,7 +5,6 @@ import shutil
import tempfile
import time
import weakref
from argparse import Namespace
from dataclasses import asdict, dataclass, field
from pathlib import Path
from typing import Callable, List, Optional, Tuple, Union
@ -35,7 +34,7 @@ from .llm_args import (CalibConfig, CudaGraphConfig, DraftTargetDecodingConfig,
LookaheadDecodingConfig, MedusaDecodingConfig,
MTPDecodingConfig, NGramDecodingConfig,
UserProvidedDecodingConfig, _ModelFormatKind,
_ModelWrapper, _ParallelConfig, get_model_format,
_ModelWrapper, _ParallelConfig,
update_llm_args_with_extra_dict,
update_llm_args_with_extra_options)
from .mpi_session import MPINodeState, MpiSession
@ -315,11 +314,6 @@ class ModelLoader:
if tokenizer is not None:
tokenizer.save_pretrained(engine_dir)
@staticmethod
def get_model_format(model_dir: str) -> _ModelFormatKind:
''' Get the format of the model. '''
return get_model_format(model_dir)
def _download_hf_model(self):
''' Download HF model from third-party model hub like www.modelscope.cn or huggingface. '''
model_dir = None
@ -566,21 +560,6 @@ class ModelLoader:
# Load engine buffer from disk
self._engine = Engine.from_dir(self._model_dir)
@staticmethod
def load_extra_build_configs_from_engine(
model_dir: str) -> Optional[Namespace]:
''' Load the extra build configs from the engine directory, return None if model isn't an engine. '''
if ModelLoader.get_model_format(
model_dir) is not _ModelFormatKind.TLLM_ENGINE:
return None
with open(Path(model_dir) / "config.json", "r") as f:
engine_config = json.load(f)
build_config = engine_config['build_config']
build_config.pop("plugin_config")
return Namespace(**build_config)
@staticmethod
def load_hf_tokenizer(
model_dir,
@ -740,7 +719,8 @@ class CachedModelLoader:
self._hf_model_dir,
mapping=self.llm_args.parallel_config.to_mapping(),
quant_config=self.llm_args.quant_config,
dtype=self.llm_args.dtype)
dtype=self.llm_args.dtype,
trust_remote_code=self.llm_args.trust_remote_code)
def _build_model(self) -> Path:
model_format = self.llm_args.model_format

View File

@ -16,9 +16,10 @@ class AutoConfig:
quant_config: Optional[QuantConfig] = None,
**kwargs):
import transformers
trust_remote_code = kwargs.get('trust_remote_code', False)
hf_config = transformers.AutoConfig.from_pretrained(
hf_model_or_dir, trust_remote_code=True)
hf_model_or_dir, trust_remote_code=trust_remote_code)
if hasattr(hf_config,
'architectures') and hf_config.architectures is not None:

View File

@ -221,8 +221,11 @@ def build_from_hf(args,
quant_output_dir.cleanup()
else: # fake weights
trtllm_config = AutoConfig.from_hugging_face(hf_model_dir, dtype,
mapping, quant_config)
trtllm_config = AutoConfig.from_hugging_face(hf_model_dir,
dtype,
mapping,
quant_config,
trust_remote_code=True)
trtllm_model = AutoModelForCausalLM.get_trtllm_model_class(
hf_model_dir)(trtllm_config)

View File

@ -267,7 +267,9 @@ def test_llm_with_dummy_weights(model_format):
hf_config = transformers.AutoConfig.from_pretrained(llama_model_path)
hf_config.save_pretrained(dummy_dir.name)
else:
config = AutoConfig.from_hugging_face(llama_model_path, dtype='float16')
config = AutoConfig.from_hugging_face(llama_model_path,
dtype='float16',
trust_remote_code=True)
config.to_json_file(os.path.join(dummy_dir.name, 'config.json'))
tokenizer = transformers.AutoTokenizer.from_pretrained(llama_model_path)
tokenizer.save_pretrained(dummy_dir.name)

View File

@ -46,7 +46,7 @@ def test_CachedModelLoader():
engine_dir, _ = model_loader()
assert engine_dir
assert engine_dir.exists() and engine_dir.is_dir()
model_format = ModelLoader.get_model_format(engine_dir)
model_format = get_model_format(engine_dir, trust_remote_code=True)
assert model_format is _ModelFormatKind.TLLM_ENGINE