mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
[nvbug/5387226] chore: add propogation for trust_remote_code to AutoConfig (#6001)
Signed-off-by: Superjomn <328693+Superjomn@users.noreply.github.com>
This commit is contained in:
parent
763012a88a
commit
7568deb2f1
@ -1286,7 +1286,8 @@ class BaseLlmArgs(BaseModel):
|
||||
'pytorch', '_autodeploy'
|
||||
]:
|
||||
# Load parallel_config from the engine.
|
||||
model_format = get_model_format(self.model)
|
||||
model_format = get_model_format(
|
||||
self.model, trust_remote_code=self.trust_remote_code)
|
||||
|
||||
if model_format is _ModelFormatKind.TLLM_ENGINE:
|
||||
if self.build_config is not None:
|
||||
@ -2083,7 +2084,8 @@ def update_llm_args_with_extra_options(llm_args: Dict,
|
||||
return llm_args
|
||||
|
||||
|
||||
def get_model_format(model_dir: str) -> _ModelFormatKind:
|
||||
def get_model_format(model_dir: str,
|
||||
trust_remote_code: bool = False) -> _ModelFormatKind:
|
||||
''' Get the format of the model. '''
|
||||
if not (Path(model_dir) / 'config.json').exists():
|
||||
raise ValueError(
|
||||
@ -2102,7 +2104,8 @@ def get_model_format(model_dir: str) -> _ModelFormatKind:
|
||||
PretrainedConfig.from_checkpoint(model_dir)
|
||||
else:
|
||||
model_format = _ModelFormatKind.HF
|
||||
AutoConfig.from_hugging_face(model_dir)
|
||||
AutoConfig.from_hugging_face(model_dir,
|
||||
trust_remote_code=trust_remote_code)
|
||||
except Exception as e:
|
||||
raise ValueError(
|
||||
f"Inferred model format {model_format}, but failed to load config.json: {e}"
|
||||
|
||||
@ -5,7 +5,6 @@ import shutil
|
||||
import tempfile
|
||||
import time
|
||||
import weakref
|
||||
from argparse import Namespace
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Callable, List, Optional, Tuple, Union
|
||||
@ -35,7 +34,7 @@ from .llm_args import (CalibConfig, CudaGraphConfig, DraftTargetDecodingConfig,
|
||||
LookaheadDecodingConfig, MedusaDecodingConfig,
|
||||
MTPDecodingConfig, NGramDecodingConfig,
|
||||
UserProvidedDecodingConfig, _ModelFormatKind,
|
||||
_ModelWrapper, _ParallelConfig, get_model_format,
|
||||
_ModelWrapper, _ParallelConfig,
|
||||
update_llm_args_with_extra_dict,
|
||||
update_llm_args_with_extra_options)
|
||||
from .mpi_session import MPINodeState, MpiSession
|
||||
@ -315,11 +314,6 @@ class ModelLoader:
|
||||
if tokenizer is not None:
|
||||
tokenizer.save_pretrained(engine_dir)
|
||||
|
||||
@staticmethod
|
||||
def get_model_format(model_dir: str) -> _ModelFormatKind:
|
||||
''' Get the format of the model. '''
|
||||
return get_model_format(model_dir)
|
||||
|
||||
def _download_hf_model(self):
|
||||
''' Download HF model from third-party model hub like www.modelscope.cn or huggingface. '''
|
||||
model_dir = None
|
||||
@ -566,21 +560,6 @@ class ModelLoader:
|
||||
# Load engine buffer from disk
|
||||
self._engine = Engine.from_dir(self._model_dir)
|
||||
|
||||
@staticmethod
|
||||
def load_extra_build_configs_from_engine(
|
||||
model_dir: str) -> Optional[Namespace]:
|
||||
''' Load the extra build configs from the engine directory, return None if model isn't an engine. '''
|
||||
if ModelLoader.get_model_format(
|
||||
model_dir) is not _ModelFormatKind.TLLM_ENGINE:
|
||||
return None
|
||||
|
||||
with open(Path(model_dir) / "config.json", "r") as f:
|
||||
engine_config = json.load(f)
|
||||
|
||||
build_config = engine_config['build_config']
|
||||
build_config.pop("plugin_config")
|
||||
return Namespace(**build_config)
|
||||
|
||||
@staticmethod
|
||||
def load_hf_tokenizer(
|
||||
model_dir,
|
||||
@ -740,7 +719,8 @@ class CachedModelLoader:
|
||||
self._hf_model_dir,
|
||||
mapping=self.llm_args.parallel_config.to_mapping(),
|
||||
quant_config=self.llm_args.quant_config,
|
||||
dtype=self.llm_args.dtype)
|
||||
dtype=self.llm_args.dtype,
|
||||
trust_remote_code=self.llm_args.trust_remote_code)
|
||||
|
||||
def _build_model(self) -> Path:
|
||||
model_format = self.llm_args.model_format
|
||||
|
||||
@ -16,9 +16,10 @@ class AutoConfig:
|
||||
quant_config: Optional[QuantConfig] = None,
|
||||
**kwargs):
|
||||
import transformers
|
||||
trust_remote_code = kwargs.get('trust_remote_code', False)
|
||||
|
||||
hf_config = transformers.AutoConfig.from_pretrained(
|
||||
hf_model_or_dir, trust_remote_code=True)
|
||||
hf_model_or_dir, trust_remote_code=trust_remote_code)
|
||||
|
||||
if hasattr(hf_config,
|
||||
'architectures') and hf_config.architectures is not None:
|
||||
|
||||
@ -221,8 +221,11 @@ def build_from_hf(args,
|
||||
quant_output_dir.cleanup()
|
||||
|
||||
else: # fake weights
|
||||
trtllm_config = AutoConfig.from_hugging_face(hf_model_dir, dtype,
|
||||
mapping, quant_config)
|
||||
trtllm_config = AutoConfig.from_hugging_face(hf_model_dir,
|
||||
dtype,
|
||||
mapping,
|
||||
quant_config,
|
||||
trust_remote_code=True)
|
||||
trtllm_model = AutoModelForCausalLM.get_trtllm_model_class(
|
||||
hf_model_dir)(trtllm_config)
|
||||
|
||||
|
||||
@ -267,7 +267,9 @@ def test_llm_with_dummy_weights(model_format):
|
||||
hf_config = transformers.AutoConfig.from_pretrained(llama_model_path)
|
||||
hf_config.save_pretrained(dummy_dir.name)
|
||||
else:
|
||||
config = AutoConfig.from_hugging_face(llama_model_path, dtype='float16')
|
||||
config = AutoConfig.from_hugging_face(llama_model_path,
|
||||
dtype='float16',
|
||||
trust_remote_code=True)
|
||||
config.to_json_file(os.path.join(dummy_dir.name, 'config.json'))
|
||||
tokenizer = transformers.AutoTokenizer.from_pretrained(llama_model_path)
|
||||
tokenizer.save_pretrained(dummy_dir.name)
|
||||
|
||||
@ -46,7 +46,7 @@ def test_CachedModelLoader():
|
||||
engine_dir, _ = model_loader()
|
||||
assert engine_dir
|
||||
assert engine_dir.exists() and engine_dir.is_dir()
|
||||
model_format = ModelLoader.get_model_format(engine_dir)
|
||||
model_format = get_model_format(engine_dir, trust_remote_code=True)
|
||||
assert model_format is _ModelFormatKind.TLLM_ENGINE
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user