From ccdd8461ac690cd0152dc12a3be6589e767bf2ce Mon Sep 17 00:00:00 2001 From: Enwei Zhu <21126786+syuoni@users.noreply.github.com> Date: Thu, 22 Jan 2026 21:15:19 +0800 Subject: [PATCH] [None][fix] Always reset drafting states for GuidedDecoder (#10899) Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com> --- tensorrt_llm/_torch/pyexecutor/guided_decoder.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorrt_llm/_torch/pyexecutor/guided_decoder.py b/tensorrt_llm/_torch/pyexecutor/guided_decoder.py index 01386e55e5..610d4e1d1d 100644 --- a/tensorrt_llm/_torch/pyexecutor/guided_decoder.py +++ b/tensorrt_llm/_torch/pyexecutor/guided_decoder.py @@ -276,6 +276,7 @@ class GuidedDecoder: assert len(req.draft_tokens) == 0 self.num_advanced_draft_tokens[ slot] += self.num_advanced_tokens[slot] + except Exception as e: error_msg = f"Guided decoding error: {str(e)}" failed_requests.append((req.request_id, error_msg)) @@ -406,10 +407,9 @@ class GuidedDecoder: for req in requests.valid_requests(): slot = req.seq_slot - if self.num_advanced_draft_tokens[slot] <= 0: - continue - self.grammar_matchers[slot].rollback( - self.num_advanced_draft_tokens[slot]) + if self.num_advanced_draft_tokens[slot] > 0: + self.grammar_matchers[slot].rollback( + self.num_advanced_draft_tokens[slot]) # Reset the drafting states. self.num_advanced_draft_tokens[slot] = 0 self.is_draft_terminated[slot] = False