mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
[fix] Fix Llama 3.3 70b EAGLE (#4772)
Signed-off-by: Mike Iovine <6158008+mikeiovine@users.noreply.github.com>
This commit is contained in:
parent
49f2f1f8eb
commit
8cb6163a57
@ -1307,7 +1307,7 @@ class Eagle3LlamaForCausalLM(DecoderModelForCausalLM[Eagle3LlamaDraftModel,
|
||||
num_tokens: int) -> Dict[str, Any]:
|
||||
|
||||
hidden_states = torch.empty(batch_size * num_tokens,
|
||||
self.model.hidden_size_in,
|
||||
self.model.hidden_size,
|
||||
dtype=self.model.dtype,
|
||||
device='cuda')
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user