mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[https://nvbugs/5463720][fix] tp-split the inferred mlp_hidden_size for nemotron-nas (#7231)
Signed-off-by: Venky Ganesh <23023424+venkywonka@users.noreply.github.com>
This commit is contained in:
parent
0fa49c5e2b
commit
6cc168a5d3
@ -344,7 +344,8 @@ class ModelConfig(Generic[TConfig]):
|
||||
architectures = self.pretrained_config.architectures
|
||||
if len(architectures
|
||||
) == 1 and architectures[0] == "DeciLMForCausalLM":
|
||||
mlp_hidden_size = self._infer_nemotron_ffn_mult()
|
||||
mlp_hidden_size = self._infer_nemotron_ffn_mult(
|
||||
) // self.mapping.tp_size
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Inferring mlp hidden size for model architecture: {architectures} isn't supported yet"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user