Fix: NVBug 5302895 (#4835)

Signed-off-by: Chuang Zhu <111838961+chuangz0@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-06-04 09:31:39 +08:00 · 2025-06-04 09:31:39 +08:00 · b13f8c9cba
commit b13f8c9cba
parent c835f06371
3 changed files with 5 additions and 4 deletions
--- a/tensorrt_llm/executor/serialization.py
+++ b/tensorrt_llm/executor/serialization.py
@ -63,7 +63,8 @@ BASE_ZMQ_CLASSES = {
        "FinishReason", "KvCacheConfig", "KvCacheTransferMode",
        "KvCacheRetentionConfig",
        "KvCacheRetentionConfig.TokenRangeRetentionConfig", "PeftCacheConfig",
-        "SchedulerConfig", "DynamicBatchConfig"
+        "SchedulerConfig", "DynamicBatchConfig", "ContextChunkingPolicy",
+        "CacheTransceiverConfig"
    ],
    "tensorrt_llm._torch.pyexecutor.config": ["PyTorchConfig"],
    "tensorrt_llm._torch.model_config": ["MoeLoadBalancerConfig"],
@ -108,6 +109,9 @@ BASE_ZMQ_CLASSES = {
    ],
    "torch._utils": ["_rebuild_tensor_v2"],
    "torch.storage": ["_load_from_bytes"],
+    "transformers.tokenization_utils_fast": ["PreTrainedTokenizerFast"],
+    "tokenizers": ["Tokenizer", "AddedToken"],
+    "tokenizers.models": ["Model"],
 }


--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@ -452,8 +452,6 @@ perf/test_perf.py::test_perf[qwen_14b_chat-cppmanager-exe-plugin_ifb-float16-inp
 perf/test_perf.py::test_perf[llama_v3.1_70b-bench-bfloat16-input_output_len:1024,1024-tp:2-gpus:2] SKIP (https://nvbugspro.nvidia.com/bug/5295411)
 perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:128,128-quant:int8-gpus:2] SKIP (https://nvbugspro.nvidia.com/bug/5295411)
 perf/test_perf.py::test_perf[starcoder2_3b-bench-pytorch-float16-input_output_len:512,200] SKIP (https://nvbugspro.nvidia.com/bug/5295411)
-test_e2e.py::test_trtllm_bench_llmapi_launch[pytorch_backend-llama-v3-llama3-8b] SKIP (https://nvbugs/5302895)
-test_e2e.py::test_trtllm_bench_llmapi_launch[trt_backend-llama-v3-llama3-8b] SKIP (https://nvbugs/5302895)
 accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[False] SKIP (https://nvbugs/5303573)
 accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[True] SKIP (https://nvbugs/5303573)
 test_e2e.py::test_openai_multi_chat_example SKIP (https://nvbugs/5236980)
--- a/tests/unittest/llmapi/test_llm_multi_gpu.py
+++ b/tests/unittest/llmapi/test_llm_multi_gpu.py
@ -334,7 +334,6 @@ def test_llm_multi_node_pytorch(nworkers: int):

@skip_single_gpu
 def test_llm_multi_node_with_postproc():
-    pytest.skip(reason="https://nvbugs/5302891")
    nworkers = 2
    test_case_file = os.path.join(os.path.dirname(__file__),
                                  "run_llm_with_postproc.py")