From f0bd60a3951e3b765f1f98e66da7b5c1954f534b Mon Sep 17 00:00:00 2001 From: Fanrong Li <23290157+lfr-0531@users.noreply.github.com> Date: Mon, 22 Dec 2025 10:56:33 +0800 Subject: [PATCH] [https://nvbugs/5684820][fix] fix the detokenizer issue for DeepSeek-v3.2 (#10106) Signed-off-by: Fanrong Li <23290157+lfr-0531@users.noreply.github.com> --- tensorrt_llm/tokenizer/tokenizer.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorrt_llm/tokenizer/tokenizer.py b/tensorrt_llm/tokenizer/tokenizer.py index 7e13643fb8..25c224cc24 100644 --- a/tensorrt_llm/tokenizer/tokenizer.py +++ b/tensorrt_llm/tokenizer/tokenizer.py @@ -213,6 +213,13 @@ class TransformersTokenizer(TokenizerBase): new_tokens = self.convert_ids_to_tokens( token_ids, skip_special_tokens=skip_special_tokens) + # filter out None tokens + if None in new_tokens: + logger.warning( + "An unexpected \"None\" token was generated. This may be caused by a generated token ID being out of the " + "tokenizer's vocabulary. Filtering out \"None\" tokens from the newly generated tokens." + ) + new_tokens = [token for token in new_tokens if token is not None] pending_tokens.extend(new_tokens) curr_new_text = self.convert_tokens_to_string(