diff --git a/tensorrt_llm/executor/serialization.py b/tensorrt_llm/executor/serialization.py index ec24015a4b..a3bd47ea6a 100644 --- a/tensorrt_llm/executor/serialization.py +++ b/tensorrt_llm/executor/serialization.py @@ -63,7 +63,8 @@ BASE_ZMQ_CLASSES = { "FinishReason", "KvCacheConfig", "KvCacheTransferMode", "KvCacheRetentionConfig", "KvCacheRetentionConfig.TokenRangeRetentionConfig", "PeftCacheConfig", - "SchedulerConfig", "DynamicBatchConfig" + "SchedulerConfig", "DynamicBatchConfig", "ContextChunkingPolicy", + "CacheTransceiverConfig" ], "tensorrt_llm._torch.pyexecutor.config": ["PyTorchConfig"], "tensorrt_llm._torch.model_config": ["MoeLoadBalancerConfig"], @@ -108,6 +109,9 @@ BASE_ZMQ_CLASSES = { ], "torch._utils": ["_rebuild_tensor_v2"], "torch.storage": ["_load_from_bytes"], + "transformers.tokenization_utils_fast": ["PreTrainedTokenizerFast"], + "tokenizers": ["Tokenizer", "AddedToken"], + "tokenizers.models": ["Model"], } diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index fe5e703523..edc55c0c00 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -452,8 +452,6 @@ perf/test_perf.py::test_perf[qwen_14b_chat-cppmanager-exe-plugin_ifb-float16-inp perf/test_perf.py::test_perf[llama_v3.1_70b-bench-bfloat16-input_output_len:1024,1024-tp:2-gpus:2] SKIP (https://nvbugspro.nvidia.com/bug/5295411) perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:128,128-quant:int8-gpus:2] SKIP (https://nvbugspro.nvidia.com/bug/5295411) perf/test_perf.py::test_perf[starcoder2_3b-bench-pytorch-float16-input_output_len:512,200] SKIP (https://nvbugspro.nvidia.com/bug/5295411) -test_e2e.py::test_trtllm_bench_llmapi_launch[pytorch_backend-llama-v3-llama3-8b] SKIP (https://nvbugs/5302895) -test_e2e.py::test_trtllm_bench_llmapi_launch[trt_backend-llama-v3-llama3-8b] SKIP (https://nvbugs/5302895) accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[False] SKIP (https://nvbugs/5303573) accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[True] SKIP (https://nvbugs/5303573) test_e2e.py::test_openai_multi_chat_example SKIP (https://nvbugs/5236980) diff --git a/tests/unittest/llmapi/test_llm_multi_gpu.py b/tests/unittest/llmapi/test_llm_multi_gpu.py index 4058323efa..a419e18516 100644 --- a/tests/unittest/llmapi/test_llm_multi_gpu.py +++ b/tests/unittest/llmapi/test_llm_multi_gpu.py @@ -334,7 +334,6 @@ def test_llm_multi_node_pytorch(nworkers: int): @skip_single_gpu def test_llm_multi_node_with_postproc(): - pytest.skip(reason="https://nvbugs/5302891") nworkers = 2 test_case_file = os.path.join(os.path.dirname(__file__), "run_llm_with_postproc.py")