diff --git a/tensorrt_llm/_torch/models/modeling_nemotron_h.py b/tensorrt_llm/_torch/models/modeling_nemotron_h.py
index ee77603ba5..e50c67991c 100644
--- a/tensorrt_llm/_torch/models/modeling_nemotron_h.py
+++ b/tensorrt_llm/_torch/models/modeling_nemotron_h.py
@@ -658,11 +658,10 @@ class NemotronHMTPDecoderLayer(NemotronHLayer):
         )
 
         if self.has_end_norm:
-            if residual is not None:
-                hidden_states = hidden_states + residual
-                residual = None
-
-            hidden_states = self.final_layernorm(hidden_states)
+            hidden_states, residual = self.final_layernorm(
+                hidden_states, residual)
+            # The last step, so don't forward the residual.
+            residual = None
 
         return hidden_states, residual
 
@@ -690,9 +689,7 @@ class NemotronHMTP(nn.Module):
         # Build pattern-based layers
         self.layers = nn.ModuleDict()
 
-        for i in range(self.pattern_len):
-            step_rel_idx = i % self.pattern_len
-
+        for step_rel_idx in range(self.pattern_len):
             char = self.pattern_str[step_rel_idx]
 
             is_start_of_step = step_rel_idx == 0
@@ -710,7 +707,7 @@ class NemotronHMTP(nn.Module):
                 skip_create_weights_in_init,
             )
 
-            self.layers[str(i)] = NemotronHMTPDecoderLayer(
+            self.layers[str(step_rel_idx)] = NemotronHMTPDecoderLayer(
                 model_config=sublayer_model_config,
                 layer_idx=self.layer_idx,
                 aux_stream_dict=aux_stream_dict,