[https://nvbugs/5463720][fix] tp-split the inferred mlp_hidden_size for nemotron-nas (#7231)

Signed-off-by: Venky Ganesh <23023424+venkywonka@users.noreply.github.com>
This commit is contained in:
Venky 2025-08-27 05:04:42 -07:00 committed by GitHub
parent 0fa49c5e2b
commit 6cc168a5d3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -344,7 +344,8 @@ class ModelConfig(Generic[TConfig]):
architectures = self.pretrained_config.architectures
if len(architectures
) == 1 and architectures[0] == "DeciLMForCausalLM":
mlp_hidden_size = self._infer_nemotron_ffn_mult()
mlp_hidden_size = self._infer_nemotron_ffn_mult(
) // self.mapping.tp_size
else:
raise ValueError(
f"Inferring mlp hidden size for model architecture: {architectures} isn't supported yet"