mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[https://nvbugs/5451426][fix] Avoid torch compile on full eagle3 worker (#7245)
Signed-off-by: Jin Li <59594262+liji-nv@users.noreply.github.com>
This commit is contained in:
parent
201fd257cc
commit
877e1f44d3
@ -266,7 +266,6 @@ class Eagle3OneModelWorker(nn.Module):
|
||||
self.max_draft_len = self.spec_config.max_draft_len
|
||||
self.mapping = mapping
|
||||
|
||||
@torch.compile(options={"max-autotune": True})
|
||||
def forward(self, input_ids, position_ids, hidden_states, logits,
|
||||
attn_metadata, spec_metadata, draft_model):
|
||||
batch_size = attn_metadata.num_seqs
|
||||
|
||||
Loading…
Reference in New Issue
Block a user