[nvbug/5387226] chore: add propogation for trust_remote_code to AutoConfig (#6001)

Signed-off-by: Superjomn <328693+Superjomn@users.noreply.github.com>
2026-01-13 22:18:36 +08:00 · 2025-07-16 16:05:38 +08:00 · 2025-07-16 16:05:38 +08:00 · 7568deb2f1
commit 7568deb2f1
parent 763012a88a
6 changed files with 20 additions and 31 deletions
--- a/tensorrt_llm/llmapi/llm_args.py
+++ b/tensorrt_llm/llmapi/llm_args.py
@ -1286,7 +1286,8 @@ class BaseLlmArgs(BaseModel):
                'pytorch', '_autodeploy'
        ]:
            # Load parallel_config from the engine.
-            model_format = get_model_format(self.model)
+            model_format = get_model_format(
+                self.model, trust_remote_code=self.trust_remote_code)

            if model_format is _ModelFormatKind.TLLM_ENGINE:
                if self.build_config is not None:
@ -2083,7 +2084,8 @@ def update_llm_args_with_extra_options(llm_args: Dict,
    return llm_args


-def get_model_format(model_dir: str) -> _ModelFormatKind:
+def get_model_format(model_dir: str,
+                     trust_remote_code: bool = False) -> _ModelFormatKind:
    ''' Get the format of the model.  '''
    if not (Path(model_dir) / 'config.json').exists():
        raise ValueError(
@ -2102,7 +2104,8 @@ def get_model_format(model_dir: str) -> _ModelFormatKind:
            PretrainedConfig.from_checkpoint(model_dir)
        else:
            model_format = _ModelFormatKind.HF
-            AutoConfig.from_hugging_face(model_dir)
+            AutoConfig.from_hugging_face(model_dir,
+                                         trust_remote_code=trust_remote_code)
    except Exception as e:
        raise ValueError(
            f"Inferred model format {model_format}, but failed to load config.json: {e}"
--- a/tensorrt_llm/llmapi/llm_utils.py
+++ b/tensorrt_llm/llmapi/llm_utils.py
@ -5,7 +5,6 @@ import shutil
 import tempfile
 import time
 import weakref
-from argparse import Namespace
 from dataclasses import asdict, dataclass, field
 from pathlib import Path
 from typing import Callable, List, Optional, Tuple, Union
@ -35,7 +34,7 @@ from .llm_args import (CalibConfig, CudaGraphConfig, DraftTargetDecodingConfig,
                       LookaheadDecodingConfig, MedusaDecodingConfig,
                       MTPDecodingConfig, NGramDecodingConfig,
                       UserProvidedDecodingConfig, _ModelFormatKind,
-                       _ModelWrapper, _ParallelConfig, get_model_format,
+                       _ModelWrapper, _ParallelConfig,
                       update_llm_args_with_extra_dict,
                       update_llm_args_with_extra_options)
 from .mpi_session import MPINodeState, MpiSession
@ -315,11 +314,6 @@ class ModelLoader:
            if tokenizer is not None:
                tokenizer.save_pretrained(engine_dir)

-    @staticmethod
-    def get_model_format(model_dir: str) -> _ModelFormatKind:
-        ''' Get the format of the model.  '''
-        return get_model_format(model_dir)
-
    def _download_hf_model(self):
        ''' Download HF model from third-party model hub like www.modelscope.cn or huggingface.  '''
        model_dir = None
@ -566,21 +560,6 @@ class ModelLoader:
        # Load engine buffer from disk
        self._engine = Engine.from_dir(self._model_dir)

-    @staticmethod
-    def load_extra_build_configs_from_engine(
-            model_dir: str) -> Optional[Namespace]:
-        ''' Load the extra build configs from the engine directory, return None if model isn't an engine. '''
-        if ModelLoader.get_model_format(
-                model_dir) is not _ModelFormatKind.TLLM_ENGINE:
-            return None
-
-        with open(Path(model_dir) / "config.json", "r") as f:
-            engine_config = json.load(f)
-
-        build_config = engine_config['build_config']
-        build_config.pop("plugin_config")
-        return Namespace(**build_config)
-
    @staticmethod
    def load_hf_tokenizer(
            model_dir,
@ -740,7 +719,8 @@ class CachedModelLoader:
            self._hf_model_dir,
            mapping=self.llm_args.parallel_config.to_mapping(),
            quant_config=self.llm_args.quant_config,
-            dtype=self.llm_args.dtype)
+            dtype=self.llm_args.dtype,
+            trust_remote_code=self.llm_args.trust_remote_code)

    def _build_model(self) -> Path:
        model_format = self.llm_args.model_format
--- a/tensorrt_llm/models/automodel.py
+++ b/tensorrt_llm/models/automodel.py
@ -16,9 +16,10 @@ class AutoConfig:
                          quant_config: Optional[QuantConfig] = None,
                          **kwargs):
        import transformers
+        trust_remote_code = kwargs.get('trust_remote_code', False)

        hf_config = transformers.AutoConfig.from_pretrained(
-            hf_model_or_dir, trust_remote_code=True)
+            hf_model_or_dir, trust_remote_code=trust_remote_code)

        if hasattr(hf_config,
                   'architectures') and hf_config.architectures is not None:
--- a/tests/microbenchmarks/build_time_benchmark.py
+++ b/tests/microbenchmarks/build_time_benchmark.py
@ -221,8 +221,11 @@ def build_from_hf(args,
        quant_output_dir.cleanup()

    else:  # fake weights
-        trtllm_config = AutoConfig.from_hugging_face(hf_model_dir, dtype,
-                                                     mapping, quant_config)
+        trtllm_config = AutoConfig.from_hugging_face(hf_model_dir,
+                                                     dtype,
+                                                     mapping,
+                                                     quant_config,
+                                                     trust_remote_code=True)
        trtllm_model = AutoModelForCausalLM.get_trtllm_model_class(
            hf_model_dir)(trtllm_config)

--- a/tests/unittest/llmapi/test_llm.py
+++ b/tests/unittest/llmapi/test_llm.py
@ -267,7 +267,9 @@ def test_llm_with_dummy_weights(model_format):
        hf_config = transformers.AutoConfig.from_pretrained(llama_model_path)
        hf_config.save_pretrained(dummy_dir.name)
    else:
-        config = AutoConfig.from_hugging_face(llama_model_path, dtype='float16')
+        config = AutoConfig.from_hugging_face(llama_model_path,
+                                              dtype='float16',
+                                              trust_remote_code=True)
        config.to_json_file(os.path.join(dummy_dir.name, 'config.json'))
    tokenizer = transformers.AutoTokenizer.from_pretrained(llama_model_path)
    tokenizer.save_pretrained(dummy_dir.name)
--- a/tests/unittest/llmapi/test_llm_utils.py
+++ b/tests/unittest/llmapi/test_llm_utils.py
@ -46,7 +46,7 @@ def test_CachedModelLoader():
    engine_dir, _ = model_loader()
    assert engine_dir
    assert engine_dir.exists() and engine_dir.is_dir()
-    model_format = ModelLoader.get_model_format(engine_dir)
+    model_format = get_model_format(engine_dir, trust_remote_code=True)
    assert model_format is _ModelFormatKind.TLLM_ENGINE