[fix] Fix Llama 3.3 70b EAGLE (#4772)

Signed-off-by: Mike Iovine <6158008+mikeiovine@users.noreply.github.com>
2026-01-13 22:18:36 +08:00 · 2025-05-30 10:08:08 -04:00 · 2025-05-30 10:08:08 -04:00 · 8cb6163a57
commit 8cb6163a57
parent 49f2f1f8eb
1 changed files with 1 additions and 1 deletions
--- a/tensorrt_llm/_torch/models/modeling_llama.py
+++ b/tensorrt_llm/_torch/models/modeling_llama.py
@ -1307,7 +1307,7 @@ class Eagle3LlamaForCausalLM(DecoderModelForCausalLM[Eagle3LlamaDraftModel,
                                num_tokens: int) -> Dict[str, Any]:

        hidden_states = torch.empty(batch_size * num_tokens,
-                                    self.model.hidden_size_in,
+                                    self.model.hidden_size,
                                    dtype=self.model.dtype,
                                    device='cuda')