mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
[https://nvbugs/5772521][fix] Fix draft token tree chain crash (#10386)
Signed-off-by: Mike Iovine <6158008+mikeiovine@users.noreply.github.com>
This commit is contained in:
parent
e98c27ee4f
commit
bedfff4f00
@ -1298,14 +1298,13 @@ class TorchSampler(Sampler, AsyncWorkerMixin):
|
||||
# Take the longest accepted path as the next new token.
|
||||
num_accepted_draft_tokens = 0
|
||||
for idx in eagle_paths[longest_match_path_idx][:longest_accepted_len]:
|
||||
add_token(
|
||||
request, new_tokens_list, beam_idx=DEFAULT_BEAM_IDX, step=cast(int, idx.item())
|
||||
)
|
||||
step = cast(int, idx.item())
|
||||
add_token(request, new_tokens_list, beam_idx=DEFAULT_BEAM_IDX, step=step)
|
||||
num_accepted_draft_tokens += 1
|
||||
if self.finish_if_reason(
|
||||
request,
|
||||
finish_reasons,
|
||||
step=num_accepted_draft_tokens,
|
||||
step=step,
|
||||
beam_idx=DEFAULT_BEAM_IDX,
|
||||
):
|
||||
break
|
||||
|
||||
Loading…
Reference in New Issue
Block a user