From 6cc168a5d3190fbfc8000e20af08692508eb0c28 Mon Sep 17 00:00:00 2001 From: Venky <23023424+venkywonka@users.noreply.github.com> Date: Wed, 27 Aug 2025 05:04:42 -0700 Subject: [PATCH] [https://nvbugs/5463720][fix] tp-split the inferred `mlp_hidden_size` for nemotron-nas (#7231) Signed-off-by: Venky Ganesh <23023424+venkywonka@users.noreply.github.com> --- tensorrt_llm/_torch/model_config.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorrt_llm/_torch/model_config.py b/tensorrt_llm/_torch/model_config.py index 232d2ccecd..2ab19e1e78 100644 --- a/tensorrt_llm/_torch/model_config.py +++ b/tensorrt_llm/_torch/model_config.py @@ -344,7 +344,8 @@ class ModelConfig(Generic[TConfig]): architectures = self.pretrained_config.architectures if len(architectures ) == 1 and architectures[0] == "DeciLMForCausalLM": - mlp_hidden_size = self._infer_nemotron_ffn_mult() + mlp_hidden_size = self._infer_nemotron_ffn_mult( + ) // self.mapping.tp_size else: raise ValueError( f"Inferring mlp hidden size for model architecture: {architectures} isn't supported yet"