mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
[https://nvbugs/5684820][fix] fix the detokenizer issue for DeepSeek-v3.2 (#10106)
Signed-off-by: Fanrong Li <23290157+lfr-0531@users.noreply.github.com>
This commit is contained in:
parent
066b653940
commit
f0bd60a395
@ -213,6 +213,13 @@ class TransformersTokenizer(TokenizerBase):
|
||||
|
||||
new_tokens = self.convert_ids_to_tokens(
|
||||
token_ids, skip_special_tokens=skip_special_tokens)
|
||||
# filter out None tokens
|
||||
if None in new_tokens:
|
||||
logger.warning(
|
||||
"An unexpected \"None\" token was generated. This may be caused by a generated token ID being out of the "
|
||||
"tokenizer's vocabulary. Filtering out \"None\" tokens from the newly generated tokens."
|
||||
)
|
||||
new_tokens = [token for token in new_tokens if token is not None]
|
||||
pending_tokens.extend(new_tokens)
|
||||
|
||||
curr_new_text = self.convert_tokens_to_string(
|
||||
|
||||
Loading…
Reference in New Issue
Block a user